#!/usr/bin/perl -w
# vim: set sw=4 ts=4 si et nowrap:
# Copyright: GPL
# Author: Guido Socher, guido@bearix.oche.de
#
use strict;
use vars qw($opt_o $opt_T $opt_C $opt_t $opt_k $opt_l $opt_h $opt_v);
use Getopt::Std;
use IO::Handle;
require 5.004;
# prototypes:
sub main::help();
sub main::today();
sub main::check_for_lfparsercfg($);
sub main::htmlumlaute($);
sub main::parse($);
sub main::printlf_format();
#
# You may change this line if you want another default language:
my $lang="en";
#
my %config; # will contain values from ~/.lfparsercfg
#
my $ver ="2.28";
#
my %validcat=("Forum"=>1,"Applications"=>1,"Hardware"=>1,'Webdesign'=>1,
'SystemAdministration'=>1,'SoftwareDevelopment'=>1,'Graphics'=>1,
'Community'=>1,'UNIXBasics'=>1,'KernelCorner'=>1,'Interviews'=>1,
'Games'=>1
);
# use english in the talkback instead of the article language
# for these languages:
my %talkbacklang=("ar"=>"en","de"=>"en","tr"=>"en","it"=>"en",
"il"=>"en","nl"=>"en","ru"=>"en","ko"=>"en",'pl'=>'en');
my $talkbacklang;
my $lfcomment="http://cgi.linuxfocus.org/cgi-bin/lfcomment";
my $lftalkback="http://cgi.linuxfocus.org/cgi-bin/lftalkback";
#
#Note: the following is automatically overloaded. Only the chset is mandatory
# if one key does not exit in language then it is taken from English (en)
my %intdat=(
'ar'=>{'chset'=>"windows-1256",'doct'=>'AR','abstract'=>'äÈÐÉ ãÎÊÕÑÉ','content'=>'ÇáÝåÑÓ','wwwresp'=>'ÇáÕÝÍÇÊ ÈÑÚÇíÉ ØÇÞã áíäßÓ ÝæßõÓ','aboutauthor'=>'äÈÐÉ Úä ÇáßÇÊÈ','auth'=>'ÇáãÄáÝ','transinfo'=>'ãÚáæãÇÊ Úä ÇáÊÑÌãÉ','home'=>'ÇáÃæáì','map'=>'ÇáÎÇÑØÉ','index'=>'ÝåÑÓ','search'=>'ÈÍË','news'=>'ÃÎÈÇÑ','archives'=>'ãÍÝæÙÇÊ','links'=>'ÑæÇÈØ','aboutus'=>'Úä ÇáãÌáÉ','topmap'=>'Topbar-ar.gif','botmap'=>'Bottombar-ar.gif','alttop'=>'[Top bar]','altbot'=>'[Bottom bar]','lfcomment'=>'ÇÖÛØ åäÇ ááÊäÈíå Úä ÎØÃ Ãæ áÇÑÓÇá ãáÇÍÙÇÊß Åáì áíäßÓ ÝæßõÓ', 'lftalkback'=>'ÊÚÞíÈß Úáì åÐÇ ÇáãÞÇá','talkbacktext'=>'áßá ãÞÇá ÕÝÍÉ ÎÇÕÉ ÈÇáÊÚÞíÈÇÊ¡ ÃÑÓá ÊÚÞíÈÇ Ãæ ÇØáÚ Úáì ÊÚÞíÈÇÊ ÇáÂÎÑíä. ','goto_talkback'=>'ÕÝÍÉ ÇáÊÚÞíÈÇÊ','TranslatedToThisLangBy'=>'ÊÑÌãå Åáì ÇáÚÑÈíÉ'},
'de'=>{'chset'=>"iso-8859-1",'doct'=>'DE','abstract'=>'Zusammenfassung','content'=>'Inhalt','wwwresp'=>'Der LinuxFocus Redaktion schreiben','aboutauthorfemale'=>'Über die Autorin','aboutauthormplural'=>'Über die Autoren','aboutauthor'=>'Über den Autor','auth'=>'von','home'=>'Home','map'=>'Plan','index'=>'Index','search'=>'Suchen','news'=>'Nachrichten','archives'=>'Archiv','links'=>'Links','aboutus'=>'Über uns','transinfo'=>'Autoren und Übersetzer','lftalkback'=>'Talkback für diesen Artikel','talkbacktext'=>'Jeder Artikel hat seine eigene Seite für Kommentare und Rückmeldungen. Auf dieser Seite kann jeder eigene Kommentare abgeben und die Kommentare anderer Leser sehen:','goto_talkback'=>'Talkback Seite','topmap'=>'Topbar-de.gif','botmap'=>'Bottombar-de.gif','lfcomment'=>'Einen Fehler melden oder einen Kommentar an LinuxFocus schicken','TranslatedToThisLangBy'=>'Übersetzt ins Deutsche von'},
'en'=>{'chset'=>"iso-8859-1",'doct'=>'EN','abstract'=>'Abstract','content'=>'Content','wwwresp'=>'Webpages maintained by the LinuxFocus Editor team','aboutauthorfemale'=>'About the author','aboutauthormplural'=>'About the authors','aboutauthorfplural'=>'About the authors','aboutauthor'=>'About the author','auth'=>'by','home'=>'Home','map'=>'Map','index'=>'Index','search'=>'Search','news'=>'News','archives'=>'Archives','links'=>'Links','aboutus'=>'About LF','transinfo'=>'Translation information','topmap'=>'Topbar-en.gif','botmap'=>'Bottombar-en.gif','alttop'=>'[Top bar]','altbot'=>'[Bottom bar]','lfcomment'=>'Click here to report a fault or send a comment to LinuxFocus', 'lftalkback'=>'Talkback form for this article','talkbacktext'=>'Every article has its own talkback page. On this page you can submit a comment or look at comments from other readers:','goto_talkback'=>'talkback page','TranslatedToThisLangBy'=>'Translated to English by','proofread'=>'proof read'},
'it'=>{'chset'=>"iso-8859-1",'doct'=>'IT','abstract'=>'Premessa','content'=>'Contenuto','auth'=>' ','aboutauthor'=>'L\'autore','home'=>'Home','map'=>'Mappa','index'=>'Indice','search'=>'Cerca','news'=>'News','archives'=>'Archivo','links'=>'Link','aboutus'=>'Cose LF','lftalkback'=>'Discussioni su quest\'articolo','talkbacktext'=>'ogni articolo possiede una sua pagina di discussione, da questa pagina puoi inviare un commento o leggere quelli degli altri lettori:','goto_talkback'=>'pagina di discussione','TranslatedToThisLangBy'=>'Tradotto in Italiano da'},
'es'=>{'chset'=>"iso-8859-1",'doct'=>'ES','abstract'=>'Resumen','content'=>'Contenidos','wwwresp'=>'Contactar con el equipo de LinuFocus','aboutauthor'=>'Sobre el autor','auth'=>'por','transinfo'=>'Información sobre la traducción','home'=>'Hogar','map'=>'Mapa','index'=>'Indice','search'=>'Busqueda','news'=>'Noticias','archives'=>'Arca','links'=>'Enlaces','aboutus'=>'Sobre LF','topmap'=>'Topbar-es.gif','botmap'=>'Bottombar-es.gif','lfcomment'=>'Pinchar aquí para informar de algún problema o enviar comentarios a LinuxFocus','lftalkback'=>'Formulario de "talkback" para este artículo','talkbacktext'=>'Cada artículo tiene su propia página de "talkback". A través de esa página puedes enviar un comentario o consultar los comentarios de otros lectores','goto_talkback'=>'Ir a la página de "talkback"','TranslatedToThisLangBy'=>'Taducido al español por'},
'fr'=>{'chset'=>"iso-8859-1",'doct'=>'FR','abstract'=>'Résumé','content'=>'Sommaire','wwwresp'=>'Site Web maintenu par l´équipe d´édition LinuxFocus','aboutauthor'=>'L´auteur','auth'=>'par','home'=>'Sommaire','map'=>'Carte','index'=>'Index','search'=>'Recherche','news'=>'Nouvelles','archives'=>'Archives','links'=>'Liens','aboutus'=>'A propos','topmap'=>'Topbar-fr.gif','botmap'=>'Bottombar-fr.gif','alttop'=>'[Barre Superieure]','altbot'=>'[Barre Inferieure]','TranslatedToThisLangBy'=>'Traduit en Français par'},
'nl'=>{'chset'=>"iso-8859-1",'doct'=>'NL','abstract'=>'Kort','content'=>'Inhoud','wwwresp'=>'Site onderhouden door het LinuxFocus editors team','aboutauthor'=>'Over de auteur','auth'=>'door', 'transinfo'=>'Vertaling info','home'=>'Home','map'=>'Map','index'=>'Index','search'=>'Zoek','news'=>'Nieuws','archives'=>'Archieven','links'=>'Links','aboutus'=>'Over LF','topmap'=>'Topbar-nl.gif','botmap'=>'Bottombar-nl.gif','alttop'=>'[Hoofd-balk]','altbot'=>'[Voet-balk]','lfcomment'=>'Klik hier om een fout te melden of commentaar te geven', 'lftalkback'=>'Talkback voor dit artikel','talkbacktext'=>'Elk artikel heeft zijn eigen talkback pagina. Daar kan je commentaar geven of commentaar van anderen lezen:','goto_talkback'=>'talkback pagina','TranslatedToThisLangBy'=>'Vertaald naar het Nederlands door'},
'gb'=>{'chset'=>"gb2312",'doct'=>'ZH', 'abstract'=>'ÕªÒª', 'content'=>'Ŀ¼', 'wwwresp'=>'Ö÷Ò³ÓÉLinuxFocus±à¼×éά»¤', 'aboutauthor'=>'¹ØÓÚ×÷Õß', 'auth'=>'by', 'home'=>'Ê×Ò³', 'map'=>'Õ¾µãµØÍ¼', 'index'=>'Ë÷Òý', 'search'=>'ËÑË÷', 'news'=>'ÐÂÎÅ', 'archives'=>'¹ýÍùÆÚ¿¯', 'links'=>'Á´½Ó', 'aboutus'=>'¹ØÓÚLF', 'transinfo'=>'·ÒëÐÅÏ¢', 'lfcomment'=>'µã»÷ÕâÀïÏòLinuxFocus±¨¸æ´íÎó»òÌá³öÒâ¼û', 'lftalkback'=>'¶ÔÕâÆªÎÄÕ·¢±íÆÀÂÛ', 'talkbacktext'=>'ÿƪÎÄÕ¶¼Óи÷×Եķ´À¡Ò³Ãæ¡£ÔÚÕâ¸öÒ³ÃæÀÄú¿ÉÒÔÌá½»ÆÀÂÛ£¬Ò²¿ÉÒԲ鿴ÆäËû¶ÁÕߵįÀÂÛ£º', 'goto_talkback'=>'·´À¡Ò³Ãæ'},
'jp'=>{'chset'=>"ISO-2022-JP",'TranslatedToThisLangBy'=>'Translated to Japanese by'},
'ko'=>{'chset'=>"EUC-KR",'doct'=>'KO','abstract'=>'¿ä¾à','content'=>'Â÷·Ê','wwwresp'=>'º» À¥»çÀÌÆ®´Â ¸®´ª½ºÆ÷Ä¿½º ÆíÁýÆÀ¿¡¼ °ü¸®ÇÕ´Ï´Ù','aboutauthor'=>'±Û¾´ÀÌ ¼Ò°³','auth'=>'Á¤º¸','home'=>'Ãʱâȸé','map'=>'Áöµµ','index'=>'Â÷·Ê','search'=>'ã¾Æº¸±â','news'=>'»õ¼Ò½Ä','archives'=>'Áö³±â»ç ¸ðÀ½','links'=>'Ãßõ»çÀÌÆ®','aboutus'=>'LF¿¡ ´ëÇÏ¿©','topmap'=>'Topbar-kr.gif','botmap'=>'Bottombar-kr.gif','alttop'=>'»óÀ§¸Þ´º','altbot'=>'ÇÏÀ§¸Þ´º','lfcomment'=>'¿ÀÀÚ¸¦ ãÀ¸¼Ì°Å³ª ÀǰßÀÌ ÀÖÀ¸½Ã¸é LinuxFocus·Î ¾Ë·ÁÁÖ¼¼¿ä','lftalkback'=>'ÀÌ ±â»ç¿¡ ´ëÇÑ ÀǰßÀÌ ÀÖ½À´Ï´Ù','talkbacktext'=>'°¢ ±â»ç´Â Çǵå¹é ÆäÀÌÁö°¡ µé¾î ÀÖ½À´Ï´Ù. Çǵå¹éÀ» ÅëÇÏ¿© ¿©·¯ºÐÀº ÀúÀÚ¿¡°Ô ÀǰßÀ» º¸³»°Å³ª ´Ù¸¥ µ¶ÀÚÀÇ ÀǰßÀ» º¸½Ç ¼ö ÀÖ½À´Ï´Ù.:','goto_talkback'=>'Çǵå¹é ÆäÀÌÁö','TranslatedToThisLangBy'=>'Translated to Korean by'},
'ru'=>{'chset'=>"koi8-r",'doct'=>'RU','abstract'=>'òÅÚÀÍÅ','content'=>'óÏÄÅÒÖÁÎÉÅ','aboutauthor'=>'ï Á×ÔÏÒÅ','auth'=>'Á×ÔÏÒ','home'=>'äÏÍÏÊ','map'=>'ëÁÒÔÁ','index'=>'éÎÄÅËÓ','search'=>'ðÏÉÓË','news'=>'îÏ×ÏÓÔÉ','archives'=>'áÒÈÉ×Ù','links'=>'óÓÙÌËÉ', 'aboutus'=>'ðÒÏ LF','TranslatedToThisLangBy'=>'ðÅÒÅ×ÏÄ ÎÁ òÕÓÓËÉÊ'},
'tr'=>{'chset'=>"iso-8859-9",'doct'=>'TR','abstract'=>'Özet','content'=>'Ýçerik','wwwresp'=>'Görselyöre sayfalarýnýn bakýmý, LinuxFocus Editörleri tarafýndan yapýlmaktadýr','auth'=>'tarafýndan','home'=>'Ev','map'=>'Eriþimdüzeni','index'=>'Ýçindekiler','search'=>'Arama','news'=>'Duyumlar','archives'=>'Belgelikler','links'=>'Baðlantýlar', 'topmap'=>'Topbar-tr.gif','botmap'=>'Bottombar-tr.gif','aboutus'=>'LF Nedir','aboutauthor'=>'Yazar hakkýnda','transinfo'=>'Çeviri bilgisi','lftalkback'=>'Bu yazý için görüþ bildiriminde bulunabilirsiniz','talkbacktext'=>'Her yazý kendi görüþ bildirim sayfasýna sahiptir. Bu sayfaya yorumlarýnýzý yazabilir ve diðer okuyucularýn yorumlarýna bakabilirsiniz.','lfcomment'=>'Burayý klikleyerek hatalarý rapor edebilir ya da yorumlarýnýzý LinuxFocus\'a gönderebilirsiniz','TranslatedToThisLangBy'=>'Türkçe\'ye çeviri'},
'cn'=>{'chset'=>"Big-5",'TranslatedToThisLangBy'=>'Translated to Chinese by'},
'pt'=>{'chset'=>"iso-8859-1",'doct'=>'pt_BR', 'abstract'=>'Abstrato', 'content'=>'Conteúdo', 'wwwresp'=>'Páginas Web mantidas pelo time de Editores LinuxFocus', 'aboutauthor'=>'Sobre o autor', 'auth'=>'por', 'home'=>'Início', 'map'=>'Mapa', 'index'=>'Índice', 'search'=>'Procura', 'news'=>'Novidades', 'archives'=>'Arquivos', 'links'=>'Links', 'aboutus'=>'Sobre LF', 'transinfo'=>'Informação sobre tradução', 'lfcomment'=>'Clique aqui para reportar uma falha ou para enviar um comentário para LinuxFocus', 'lftalkback'=>'Forma de respostas para este artigo', 'talkbacktext'=>'Every article has its own talkback page. On this page you can submit a comment or look at comments from other readers:', 'talkbacktext'=>'Todo artigo tem sua própria página de respostas. Nesta página você pode enviar um comentário ou ver os comentários de outros leitores:', 'goto_talkback'=>'página de respostas','TranslatedToThisLangBy'=>'Traduzido para Português por'},
'il'=>{'chset'=>"iso-8859-8",'ignore_chset'=>"windows-1255",'doct'=>'IL', 'abstract'=>'úîöéú', 'aboutauthor'=> 'òì äîçáø', 'auth'=> 'òì-éãé', 'home'=> 'ãó áéú', 'map'=> 'ìåç', 'index'=> 'àéðã÷ñ', 'search'=> 'çéôåù', 'news'=> 'îä çãù', 'archives'=> 'àøëéåï', 'links'=> '÷éùåøéí', 'aboutus'=> 'LF òì', 'content'=>'úåëï','transinfo'=>'äñáø òì äúøâåí', 'lfcomment'=>'ìçõ ëàï áëãé ìãååç òì áòéä áàúø', 'lftalkback'=>'îòøëú úâåáåú ìîàîø äð"ì', 'talkbacktext'=>'ìëì îàîø éù îòøëú úâåáåú îùìå. áãó æä úåëì ìäåñéó äòøä àå ôùåè ìäñúëì áäòøåú ùì ÷åøàéí àçøéí', 'goto_talkback'=>'ìîòøëú úâåáåú', 'TranslatedToThisLangBy'=>'úåøâí ìòáøéú ò"é', 'proofread'=>'÷øéàú äâää'},
'pl'=>{'chset'=>"iso-8859-1",'doct'=>'PL', 'abstract'=>'abstrakcyjne', 'content'=>'Zawartosc', 'wwwresp'=>'Strona prowadzona przez LinuxFocus Editor ', 'aboutauthor'=>'o autorze', 'auth'=>'przez', 'home'=>'Home', 'map'=>'Mapa', 'index'=>'Index', 'search'=>'Szukaj', 'news'=>'Nowosci', 'archives'=>'Archiwy', 'links'=>'Linki', 'aboutus'=>'O nas', 'transinfo'=>'informacje tlumaczeniowe', 'lfcomment'=>'kliknij tutaj zeby wyslac komentarz albo poinformowac nas o blendze', 'lftalkback'=>'Diskusja do tego artykolu', 'talkbacktext'=>'Komentarze do dyskusji:', 'goto_talkback'=>'Strona dyskusyjna'},
);
#
# languages which get the convert to palm:
my %palm=('en'=>1,'de'=>1,'fr'=>1,'nl'=>1,'pt'=>1,'es'=>1,'it'=>1,'tr'=>1,'pl'=>1);
# enforce html Umlaute for latin1
my %islatin=('en'=>1,'de'=>1,'pt'=>1,'fr'=>1,'nl'=>1,'es'=>1,'it'=>1,'pl'=>1,);
#
# global data:
my $today;
my $parsestate=0;
my @parsedtypes;
my @parseddata;
#
my $articlename;
my $articlenumber;
my $articlecategory;
my $articletitle;
my $articleauthorimg;
my $articleauthor;
my $articleauthorgender="";
my $articleauthorname;
my @articletransinfo=();
my @articleaboutauthor;
my @articleabstract;
my @articleindex;
my $articleimage;
my $articlebody;
my ($fd_out,$infile,$tmpline,$linelen,$prestate);
#
#
my $text;
#
getopts("voTCkl:ht")||die "ERROR: No such option. -h for help.\n";
help() if ($opt_h);
if ($opt_v){
print "lfparser version $ver\n";
exit 0;
}
$today=today();
check_for_lfparsercfg(\%config);
#
if ($opt_k){
print "Valid categories are:\n";
$opt_k=0;
foreach (sort keys %validcat){
print " $_,";
$opt_k++;
print "\n" if ($opt_k % 4 ==0);
}
print "\nValid keyword headings are:
ArticleCategory:
AuthorImage:
TranslationInfo:
or AuthorName:
AboutTheAuthor:
Abstract:
ArticleIllustration:
ArticleBody:
\n";
exit(0);
}
if ($config{'lang'}){
die "ERROR: invalid language in configfile ~/.lfparsercfg\n" unless($intdat{$config{'lang'}}{'chset'});
$lang=$config{'lang'};
}
if ($opt_l){
die "ERROR: invalid language specifier\n" unless($intdat{$opt_l}{'chset'});
$lang=$opt_l;
}
if ($config{'style'} && $config{'style'} eq "1"){
$opt_o=1;
}
# copy keys from the english section that are not defined in this one:
foreach (keys %{$intdat{'en'}}){
$intdat{$lang}{$_} = $intdat{'en'}{$_} unless ($intdat{$lang}{$_});
}
#
help() unless ($ARGV[0]);
$infile=$ARGV[0];
$fd_out=new IO::Handle;
if (! -f "$infile" && $infile=~/^(\d+)$/){
# only a number given. The file name is articleNUM.meta.shtml
die "ERROR: no such file article$1.meta.shtml\n" unless(-f "article$1.meta.shtml");
open(OUTFD,">article$1.shtml")||die "ERROR: can not write article$1.shtml\n";
$infile="article$1.meta.shtml";
$fd_out->fdopen(fileno(OUTFD),"w")||die;
print STDERR "Language: $lang, Reading $infile .... writing article$1.shtml ...\n";
}else{
$fd_out->fdopen(fileno(STDOUT),"w")||die "ERROR: can not write to stdout\n";
}
$articlename=$infile;
$articlename=~s/meta\.//;
# basename:
$articlename=~s=^.*/==;
if ($articlename=~/(\d+)/){
$articlenumber=$1;
}else{
$articlenumber=0;
}
open (FF,"$infile")||die "ERROR: can not read file $infile\n";
$text="";
# here we check that all the 7 key word headings on level h4 are available:
my $headcheck=0;
my %valhead=('ArticleCategory'=>1,'AuthorImage'=>2,'AuthorName'=>3,
'TranslationInfo'=>3,'AboutTheAuthor'=>4,'Abstract'=>5,
'ArticleIllustration'=>6,'ArticleBody'=>7);
my %missingheading=(1=>'ArticleCategory',2=>'AuthorImage',3=>'TranslationInfo',
4=>'AboutTheAuthor',5=>'Abstract',6=>'ArticleIllustration',7=>'ArticleBody');
my $ArticleBody=0;
my $l=0;
while(){
$l++;
chomp;
if ($headcheck < 7 && /\s*(\w+)/i){
$headcheck++;
$ArticleBody=1 if ($headcheck==7);
if ($valhead{$1}){
# it's a valid heading
unless($valhead{$1}==$headcheck){
die "ERROR: before line $l, I was expecting key word heading $missingheading{$headcheck}, but I found already $1\n";
}
}else{
die "ERROR: line $l, key word heading not valid. The only valid
headings are:
ArticleCategory:
AuthorImage:
TranslationInfo:
or AuthorName:
AboutTheAuthor:
Abstract:
ArticleIllustration:
ArticleBody:
They must come in this order and with the exact spelling as above.
One of the headings is missing or has wrong spelling.\n";
}
}
s/_LF_/LinuxFocus<\/font>/g;
s/\s+$//g; # kill tailing space
#
if (//i){
$prestate=1;
}
if (/<\/pre>/i){
$prestate=0;
}
if ($prestate){
# check line lenght
$tmpline=$_;
# count things like & ü as one charcater only:
$tmpline=~s/&\w+;/x/g;
$linelen=length($tmpline) - 81;
# up to 82 should be ok:
if ($linelen > 1){
print STDERR "$infile:${l}: Warning line inside too long. This causes problems when printing the article. Try to make this line $linelen characters shorter.\n";
}
}
$text.="$_\n"; # write in one long variable
}
close FF;
unless ($ArticleBody){
die "ERROR: key word heading ArticleBody:
not found\n";
}
if ($islatin{$lang}){
htmlumlaute(\$text);
}
parse(\$text);
evalarticle();
$articleauthor=~s/\@/@/g; # harden spamers life
printlf_format();
#-----
# read ~/.lfparsercfg
sub check_for_lfparsercfg($){
my $cfghashref=shift;
my $home=(getpwuid($>))[7];
return 1 unless( -r "$home/.lfparsercfg");
open(CFG,"< $home/.lfparsercfg")||die;
while(){
next if (/^\s*#/);
s/#.*//;
s/\s+//g;
if (/(\w+)=(\S+)/){
$cfghashref->{$1}=$2;
}
}
close CFG;
return 0;
}
#-----
# Take the global data and print an article in LF format
sub printlf_format(){
my $tmp;
my $i=0;
my $base="";
if ($opt_t){
$base="";
}
$fd_out->print("\n");
if ($lang eq "ar"){
$fd_out->print("\n");
}else{
$fd_out->print("\n");
}
$fd_out->print("
lf$articlenumber, ${articlecategory}: $articletitle
$base
");
if ($opt_o){
$fd_out->print("

");
}else{
$fd_out->print("
 |
");
}
$fd_out->print("
");
if ($palm{$lang}){
$fd_out->print("
");
}
$fd_out->print("
<$articleauthorimg>
$intdat{$lang}{auth} $articleauthor
\n");
if (@articleaboutauthor){
$fd_out->print("".$intdat{$lang}{'aboutauthor'.$articleauthorgender}.": \n");
$fd_out->print(join "", @articleaboutauthor);
$fd_out->print("\n");
}
# some times we want it in english:
if ($talkbacklang{$lang}){
$talkbacklang=$talkbacklang{$lang};
}else{
$talkbacklang=$lang;
}
my $Translatedto_printed=0;
my $proofread="";
$fd_out->print("\n");
for $tmp (@articletransinfo){
if ($tmp->{'to'} eq $lang && $tmp->{'from'} ne 'orig'){
if ($tmp->{'from'} eq $lang){
next if ($Translatedto_printed==0);
$proofread= " [".$intdat{$lang}{'proofread'}."]";
}else{
$proofread= "";
}
$fd_out->print("
".$intdat{$lang}{'TranslatedToThisLangBy'}.": \n") unless($Translatedto_printed);
$Translatedto_printed=1; # there may be a 'en to en' for proof reading
if ($tmp->{'linktype'} eq 'email'){
$fd_out->print($tmp->{'name'} . "$proofread <".$tmp->{'link'}.">\n");
}else{
$fd_out->print($tmp->{'name'} . "$proofread ({'link'}."\">homepage)\n");
}
$fd_out->print(" \n");
}
}
$fd_out->print("\n");
if (@articleindex){
$fd_out->print(" $intdat{$lang}{content}:\n\n");
}else{
print STDERR "Warning: could not generate an article index\n";
}
$fd_out->print("\n |
\n\n");
$fd_out->print("
\n"); # needed due to a bug in netscape
$fd_out->print("$articletitle
\n $articleimage");
$fd_out->print("\n\n");
$fd_out->print("$intdat{$lang}{abstract}:\n
\n");
$tmp= join "", @articleabstract;
$fd_out->print($tmp);
if ($opt_o){
# old black bar:
$fd_out->print("
\n
\n");
}else{
# new blue bar:
$fd_out->print("
");
}
$fd_out->print("\n");
$fd_out->print("$articlebody\n");
$fd_out->print("\n");
$fd_out->print("
$intdat{$lang}{lftalkback}
$intdat{$lang}{talkbacktext}
\n") if (!$opt_T && $articlenumber > 100);
$fd_out->print("
\n");
$fd_out->print("
$intdat{$lang}{wwwresp}
© $articleauthorname, FDL LinuxFocus.org
");
if ($opt_C){
$fd_out->print(" | \n");
}else{
$fd_out->print("
$intdat{$lang}{lfcomment}
\n");
}
if (scalar(@articletransinfo)>0){ # set to 1 to show only a list if there is at least one translator
$fd_out->print("\n\n");
$fd_out->print("$intdat{$lang}{transinfo}:\n\n");
for $tmp (@articletransinfo){
if ($tmp->{'from'} eq 'orig'){
$fd_out->print(" ");
$fd_out->print($tmp->{'to'}." --> -- : ");
if ($tmp->{'linktype'} eq 'email'){
$fd_out->print($tmp->{'name'} . " <".$tmp->{'link'}."> | \n");
}else{
$fd_out->print($tmp->{'name'} . " ({'link'}."\">homepage)\n");
}
next;
}
$fd_out->print(" ");
$fd_out->print($tmp->{'from'}." --> ".$tmp->{'to'}.": ");
if ($tmp->{'linktype'} eq 'email'){
$fd_out->print($tmp->{'name'} . " <".$tmp->{'link'}."> | \n");
}else{
$fd_out->print($tmp->{'name'} . " ({'link'}."\">homepage)\n");
}
}
$fd_out->print(" \n | \n");
}else{
$fd_out->print(" ");
$fd_out->print("\n\n");
$fd_out->print(" | \n");
}
$fd_out->print("
\n");
$fd_out->print("$today, generated by lfparser version $ver
\n");
$fd_out->print("\n");
$fd_out->print("\n\n");
}
#-----
# handle the parsed text chunks.
sub evalarticle{
my $i=0;
my $type;
my $content;
my $transinfostate=0;
my ($link,$linktype,$name,$transinfolang1,$transinfolang2);
# states in which we ignore
my %ignorePandBR=(1=>1,2=>1,3=>1,4=>1,5=>1,6=>1,7=>1,8=>1,11=>1,12=>1);
for $type (@parsedtypes){
# remove empty text and which is inserted by WYSIWYG editors
$parseddata[$i]=~ s/\ \;//g if ($type eq "Text");
if ($type eq "Text" && $parseddata[$i]=~ /^[\r\n\t ]+$/){
$i++; next;
}
if ($type eq "Text" && !$parseddata[$i]){
$i++; next;
}
# dbg, debug:
#print "-- $parsestate: $parseddata[$i] type: $type --\n";
# start of article, search for heading:
if ($parsestate==0 && $type=~/HeadingLevelTag/){
if ($type eq "HeadingLevelTag1"){
$articletitle=$parseddata[$i];
$articletitle=~s/\s+/ /g;
$parsestate++;
}else{
die "ERROR: The first heading must be the title of the article on level 1. Note: you may not have \"_LF_\" or nested tags in the title.\n";
}
$i++; next;
}
# ignoring of ,
,
in certain states:
if ($ignorePandBR{$parsestate}){
if ($type eq "StartTag" && $parseddata[$i] =~/^P$/i){ $i++; next;}
if ($type eq "StartTag" && $parseddata[$i] =~/^br$/i){ $i++; next;}
if ($type eq "EndTag" && $parseddata[$i] =~/^\/P$/i){ $i++; next;}
}
# start of article, search for ArticleCategory:
if ($parsestate==1){
if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/ArticleCategory/){
$parsestate++;
}else{
die "ERROR: The second heading must be ArticleCategory on level 4\n";
}
$i++; next;
}
#--
# looking for the category
if ($parsestate==2){
if ($type eq "Text"){
$articlecategory=$parseddata[$i];
$articlecategory=~s/\s+//g;
$parsestate++;
}else{
die "ERROR: The heading ArticleCategory must be followed by a text plain string without tags\n";
}
$i++; next;
}
#--
# looking for the image heading
if ($parsestate==3){
if ($type eq "HeadingLevelTag4"){
$parsestate++;
}else{
die "ERROR: The 3-rd heading must be AuthorImage after ArticleCategory description\n";
}
$i++; next;
}
#--
# looking for the image
if ($parsestate==4){
if ($type eq "StartTag" && $parseddata[$i]=~/img/i){
$parsestate++;
$articleauthorimg=$parseddata[$i];
}else{
die "ERROR: Image of author missing after AuthorImage heading\n";
}
$i++; next;
}
#--
# looking for the AuthorName
if ($parsestate==5){
# the old format is AuthorName the new is TranslationInfo
# and they are mutual exclusive
if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/AuthorName/){
$parsestate=6;
}elsif ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/TranslationInfo/){
$parsestate=7;
}else{
die "ERROR: AuthorName or TranslationInfo must be the heading after the Image\n";
}
$i++; next;
}
#--
# looking for the name and e-mail or home-page
if ($parsestate==6){
if ($type eq "AnchorTag"){
$articleauthor="<" . $parseddata[$i] . ">";
if ($parseddata[$i]=~/\" *>(.+?)<\//){
$articleauthorname=$1;
}else{
die "ERROR: in <$parseddata[$i]>, could not extract e-mail or home-page\n";
}
$parsestate=8;
}else{
die "ERROR: AuthorName must followed by an anchor tag\n";
}
$i++; next;
}
#--
# looking for the name and e-mail or home-page
# parse the TranslationInfo pre-tag:
if ($parsestate==7){
if ($transinfostate == 0){
if($type eq "Text" && $parseddata[$i]=~/original in +(\w+)/i){
$transinfostate++;
die "ERROR: in TranslationInfo language $1 not supported. Type lfparser -h to see the supported languages \n" unless($intdat{$1}{'chset'});
$transinfolang1='orig';
$transinfolang2=$1;
$i++; next;
}else{
die "ERROR1: in $parseddata[$i]: TranslationInfo must be followed by pargraph that looks like: original in LANG Author Name
or original in LANG Author Name
\n";
}
}else{
if($type eq "Text" && $parseddata[$i]=~/original in +(\w+)/i){
die "ERROR1a: there must be only one original author under TranslationInfo\n";
}
}
if ($transinfostate == 1){
# this is still the original author but this time the A HREF=...
# the tag can look like this:
# a href="mailto:katja@linuxfocus.org" gender="female"
# a href="mailto:katja@linuxfocus.org" gender="mplural"
# a href="mailto:katja@linuxfocus.org" gender="fplural"
if ($type eq "AnchorTag"){
$parseddata[$i]=~s/[\n\r\t]/ /g;
if ($parseddata[$i]=~/gender/i){
if ($parseddata[$i]=~/female/){
$articleauthorgender="female";
}elsif($parseddata[$i]=~/fplural/){
$articleauthorgender="fplural";
}elsif($parseddata[$i]=~/mplural/){
$articleauthorgender="mplural";
}
}
$parseddata[$i]=~s/gender *= *"?\w+"?//gi;
#$articleauthor="<" . $parseddata[$i] . ">";
$transinfostate++;
if ($parseddata[$i]=~/= *[\'\"]([^\"\']+)[\'\"] *>(.+?)<\//){
$articleauthorname=$2;
}else{
die "ERROR2: in <$parseddata[$i]>, can not extract name\n";
}
$link=$1; # could in this case as well be a homepage
$name=$2;
$name=~s/\s+/ /g;
$link=~s/\s+//g;
if ($link=~/(nospam|mailto):/i){
$linktype="email";
$link=~s/(nospam|mailto)://g;
$link=~s/\@/(at)/g; # could be several authors
$link=~s/\s//g;
$link=~s/,/, /g; # could be several authors
$articleauthor="$name
<$link>";
}else{
$linktype="homepage";
$articleauthor="$name (homepage)";
}
push(@articletransinfo,{'from',$transinfolang1,'to',$transinfolang2,'name',$name,'link',$link,'linktype',$linktype});
$i++; next;
}else{
die "ERROR3: TranslationInfo must be followed by pargraph that looks like: original in LANG Author Name
\n";
}
}
if (($transinfostate % 2) == 0){
# this is the "lang to lang" or already the AboutTheAuthor
if($type eq "Text" && $parseddata[$i]=~/(\w+) +to +(\w+)/i){
$transinfostate++;
die "ERROR4: in TranslationInfo language $1 not supported. Type lfparser -h to see the supported languages \n" unless($intdat{$1}{'chset'});
$transinfolang1=$1;
$transinfolang2=$2;
$i++; next;
# looking for the AboutTheAuthor
}elsif ($type eq "HeadingLevelTag4"){
# here we look also for the next heading:
if ($parseddata[$i]=~/AboutTheAuthor/){
$parsestate=9;
die "ERROR7: TranslationInfo not complete\n" unless(scalar(@articletransinfo) > 0);
}else{
die "ERROR8: The heading after TranslationInfo must be AboutTheAuthor and not \"$parseddata[$i]\"\n";
}
$i++; next;
}else{
die "ERROR5: in $parseddata[$i]: TranslationInfo must have a pargraph that looks like: LANG1 to LANG2Translator Name
\nAdditional and other things are not allowed\n";
}
}
if (($transinfostate % 2) == 1){
if ($type eq "AnchorTag"){
$transinfostate++;
$parseddata[$i]=~s/[\r\n]/ /g;
$parseddata[$i]=~s/gender *= *"?\w+"?//gi;
if ($parseddata[$i]=~/= *[\'\"]([^\"\']+)[\'\"] *>(.+?)<\//){
$link=$1; # could in this case as well be a homepage
$name=$2;
$name=~s/\s+/ /g;
$link=~s/\s+//g;
if ($link=~/(nospam|mailto):/){
$linktype="email";
$link=~s/mailto://g;
$link=~s/nospam://g;
$link=~s/\@/(at)/g; # could be several authors
$link=~s/\s//g;
$link=~s/,/, /g; # could be several authors
}else{
$linktype="homepage";
}
push(@articletransinfo,{'from',$transinfolang1,'to',$transinfolang2,'name',$name,'link',$link,'linktype',$linktype});
}else{
die "ERROR2: TranslationInfo ($parseddata[$i]): could not get name\n";
}
$i++; next;
}else{
die "ERROR6: TranslationInfo must have a pargraph that looks like: LANG1 to LANG2Translator Name
\n";
}
}
$i++; next;
}
#--
# looking for the AboutTheAuthor when there is no TranslationInfo
if ($parsestate==8){
if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/AboutTheAuthor/){
$parsestate++;
}else{
die "ERROR: The heading after AuthorName must be AboutTheAuthor and not \"$parseddata[$i]\"\n";
}
$i++; next;
}
#--
# reading about the author (html text without heading)
if ($parsestate==9){
if ($type=~/HeadingLe/){
if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/Abstract/){
$parsestate++;
}else{
die "ERROR: The heading after the \"about the author\" paragraph must be the Abstract and not \"$parseddata[$i]\"\n";
}
}else{
# reading any html:
if ($type=~/Tag/){
push(@articleaboutauthor,"<" . $parseddata[$i] . ">");
}elsif ($type eq "Text"){
push(@articleaboutauthor,$parseddata[$i]);
}else{
die "Programm error, unknown type $type in about author\n";
}
}
$i++; next;
}
#--
# reading the abstract (html text without heading)
if ($parsestate==10){
if ($type=~/HeadingLe/){
if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/ArticleIllustration/){
$parsestate++;
}else{
die "ERROR: The heading after the abstract paragraph must be ArticleIllustration but I found $type:\"$parseddata[$i]\"\n";
}
}else{
# reading any html:
if ($type=~/Tag/){
push(@articleabstract,"<" . $parseddata[$i] . ">");
}elsif ($type eq "Text"){
push(@articleabstract,$parseddata[$i]);
}else{
die "Programm error, unknown type $type in abstract\n";
}
}
$i++; next;
}
#--
# looking for the article illustration
if ($parsestate==11){
if ($type eq "StartTag" && $parseddata[$i]=~/img/i){
$parsestate++;
$articleimage="<" . $parseddata[$i] . ">";
}else{
die "ERROR: Image of article missing after ArticleIllustration heading\n";
}
$i++; next;
}
#--
# looking for the ArticleBody is already checked in the parser:
if ($parsestate==12){
if ($type eq "Body"){
$articlebody=$parseddata[$i];
$articlebody=~s| ||g;
}else{
die "Program error: state 12 but tag-type $type instead of ArticleBody\n";
}
$i++; next;
}
#--
$i++;
}
die "ERROR: invalid article meta-format, debug state $parsestate. Either you do not have a at the beginning or there is still a bug in lfparser.\n" unless ($parsestate == 12);
parsebodyforindex(\$articlebody);
unless ($validcat{$articlecategory}){
print STDERR "ERROR invalid article category $articlecategory\n";
print STDERR "valid categories are:\n";
foreach (keys %validcat){
print STDERR " - \"$_\"\n";
}
exit 1;
}
}
#-----
# generate an index for the file.
# parse the html file body and store the H2 H3 text stings in @articleindex
# parse takes a ref to a text string as argument.
sub parsebodyforindex($){
my $text = shift;
my @body;
my $h;
my $i=0;
while (1) {
# First we try to pull off any plain text (anything before a "<" char)
if ($$text =~ /\G([^<]+)/gcs) {
push(@body,$1);
} elsif ($$text =~ /\G<\/HTML>/igcs) {
next;
} elsif ($$text =~ /\G<\/body>/igcs) {
next;
} elsif ($$text =~ /\G<[hH]3>(.+?)<\/[hH]3>/gcs) {
$h=$1;
push(@body," \n".$h ."
\n");
push(@articleindex,$h);
$i++;
} elsif ($$text =~ /\G<[hH]2>(.+?)<\/[hH]2>/gcs) {
$h=$1;
push(@body," \n".$h ."
\n");
push(@articleindex,$h);
$i++;
} elsif ($$text =~ m|\G(<[^>]*>)|gcs) {
push(@body,$1);
} else {
# the string is exhausted, or there's no > in it.
last;
}
}
foreach $h (@articleindex){
$h=~s/<.+?>//g;
}
$articlebody=join "",@body;
}
#-----
# parse the html file and store the result in @parseddata, @parsedtypes.
# parse takes a ref to a text string as argument.
sub parse($){
my $text = shift;
my $type;
my $content;
while (1) {
# First we try to pull off any plain text (anything before a "<" char)
if ($$text =~ /\G([^<]+)/gcs) {
$content = $1; $type = 'Text';
} elsif ($$text =~ /\G<(!--.*?--)>/gcs) {
# we ignore comments except if they are in the article body:
next;
#$type = 'Comment';
#$content = $1;
} elsif ($$text =~ /\G<(!.*?)>/gcs) {
$type = 'Markup';
$content = $1;
# Then, look for an end tag
} elsif ($$text =~ m|\G<(/[a-zA-Z][^<]*?)>|gcs) {
$content = $1; $type = 'EndTag';
# Look for a .. tag:
} elsif ($$text =~ /\G<([aA] [^>]+>([^<]+)<\/[aA])>/gcs) {
$content = $1; $type = "AnchorTag";
# Look for a h[0-9] tag:
} elsif ($$text =~ /\G<[hH](\d)>([^<]+)<\/[hH]\d>/gcs) {
$content = $2; $type = "HeadingLevelTag$1";
if ("$1" eq "4" && index($content,"ArticleBody")> -1){
$content=$';
$type="Body";
push(@parseddata,$content);
push(@parsedtypes,$type);
last;
}
# Then, finally we look for a start tag
# We know the first char is <, make sure there's a >
} elsif ($$text =~ /\G<(.+?)>/gcs) {
$content = $1; $type = 'StartTag';
} else {
# the string is exhausted, or there's no > in it.
last;
}
#print "dbg $content type: $type\n";
push(@parseddata,$content);
push(@parsedtypes,$type);
}
}
#--------------
sub htmlumlaute($){
my $txt_ptr=shift;
$$txt_ptr=~s/¡/\¡/g;
$$txt_ptr=~s/¿/\¿/g;
$$txt_ptr=~s/À/\À/g;
$$txt_ptr=~s/Á/\Á/g;
$$txt_ptr=~s/Â/\Â/g;
$$txt_ptr=~s/Ã/\Ã/g;
$$txt_ptr=~s/Ä/\Ä/g;
$$txt_ptr=~s/Å/\Å/g;
$$txt_ptr=~s/Ç/\Ç/g;
$$txt_ptr=~s/È/\È/g;
$$txt_ptr=~s/É/\É/g;
$$txt_ptr=~s/Ê/\Ê/g;
$$txt_ptr=~s/Ë/\Ë/g;
$$txt_ptr=~s/Ì/\Ì/g;
$$txt_ptr=~s/Í/\Í/g;
$$txt_ptr=~s/Î/\Î/g;
$$txt_ptr=~s/Ï/\Ï/g;
$$txt_ptr=~s/Ñ/\Ñ/g;
$$txt_ptr=~s/Ò/\Ò/g;
$$txt_ptr=~s/Ó/\Ó/g;
$$txt_ptr=~s/Ô/\Ô/g;
$$txt_ptr=~s/Õ/\Õ/g;
$$txt_ptr=~s/Ö/\Ö/g;
$$txt_ptr=~s/Ø/\Ø/g;
$$txt_ptr=~s/Ù/\Ù/g;
$$txt_ptr=~s/Ú/\Ú/g;
$$txt_ptr=~s/Û/\Û/g;
$$txt_ptr=~s/Ü/\Ü/g;
$$txt_ptr=~s/Ý/\Ý/g;
$$txt_ptr=~s/ß/\ß/g;
$$txt_ptr=~s/à/\à/g;
$$txt_ptr=~s/á/\á/g;
$$txt_ptr=~s/â/\â/g;
$$txt_ptr=~s/ã/\ã/g;
$$txt_ptr=~s/ä/\ä/g;
$$txt_ptr=~s/å/\å/g;
$$txt_ptr=~s/æ/\æ/g;
$$txt_ptr=~s/ç/\ç/g;
$$txt_ptr=~s/è/\è/g;
$$txt_ptr=~s/é/\é/g;
$$txt_ptr=~s/ê/\ê/g;
$$txt_ptr=~s/ë/\ë/g;
$$txt_ptr=~s/ì/\ì/g;
$$txt_ptr=~s/í/\í/g;
$$txt_ptr=~s/î/\î/g;
$$txt_ptr=~s/ñ/\ñ/g;
$$txt_ptr=~s/ò/\ò/g;
$$txt_ptr=~s/ó/\ó/g;
$$txt_ptr=~s/ô/\ô/g;
$$txt_ptr=~s/ö/\ö/g;
$$txt_ptr=~s/ù/\ù/g;
$$txt_ptr=~s/ú/\ú/g;
$$txt_ptr=~s/û/\û/g;
$$txt_ptr=~s/ü/\ü/g;
}
#--------------
sub today(){
my @ltime = localtime;
#return a date in yyyy-mm-dd format
my $today;
$today = sprintf("%04d-%02d-%02d",1900 + $ltime[5],$ltime[4] + 1,$ltime[3]);
$today;
}
#-----
#
sub help(){
print "lfparser -- parse a LinuxFocus article in HTML meta syntax and
generate a final LinuxFocus article. The HTML meta syntax is described
in http://www.linuxfocus.org/~guido/dev/lfparser.html
It is a special HTML format that can easily be edited and converted to
the released article format. It gives LinuxFocus the flexibilty to change
the layout without editing all articles.
USAGE: lfparser [-hCktoTv][-l ar|cn|de|en|es|fr|gb|il|jp|ko|nl|pt|pl|ru|it|tr] articleX.meta.shtml > articleX.shtml
or
USAGE: lfparser [-hCktoTv][-l ar|cn|de|en|es|fr|gb|il|jp|ko|nl|pt|pl|ru|it|tr] num
OPTIONS: -h this help
-C do not generate a link to lfcomment
-l select a language for the output [config file: lang=xx]
-k list all valid categories, and H4 headings and exit
-o use old style header [config file: style=1]
-T do not include talkback
-t test mode. This inserts a into the
article to include the images and other stuff from
../../common/ without the need to have them locally available.
This option must not be used for the final article.
-v print version and exit.
If you do not specify a filename as argument but just a number
then lfparser will seatch for a file called article.meta.shtml
in the current directory and write to article.shtml
This is a shortcut to save some typing.
EXAMPLE: French:
lfparser -l fr article111.meta.shtml > article111.shtml
or as shortcut:
lfparser -l fr 111
Arabic:
lfparser -l ar articleX.meta.shtml > articleX.shtml
You can have an optional ~/.lfparsercfg file with the following
syntax:
# comment
lang = de # make German the default language
style = 2 # new style, 1 would be old style
#
This will then set the configuration options described under
OPTIONS and you can run lfparser without specifying any options:
lfparser articleX.meta.shtml > articleX.shtml
This is lfparser version: $ver\n";
exit;
}
__END__