#!/usr/bin/perl -w
# vim: set sw=4 ts=4 si et nowrap:
# Copyright: GPL
# Author: Guido Socher, guido@bearix.oche.de
#
use strict;
use vars qw($opt_o $opt_T $opt_C $opt_t $opt_k $opt_l $opt_h $opt_v);
use Getopt::Std;
use IO::Handle;
require 5.004;
# prototypes:
sub main::help();
sub main::today();
sub main::check_for_lfparsercfg($);
sub main::htmlumlaute($);
sub main::parse($);
sub main::printlf_format();
#
# You may change this line if you want another default language:
my $lang="en";
#
my %config; # will contain values from ~/.lfparsercfg
#
my $ver ="2.28";
#
my %validcat=("Forum"=>1,"Applications"=>1,"Hardware"=>1,'Webdesign'=>1,
    'SystemAdministration'=>1,'SoftwareDevelopment'=>1,'Graphics'=>1,
    'Community'=>1,'UNIXBasics'=>1,'KernelCorner'=>1,'Interviews'=>1,
    'Games'=>1
    );
# use english in the talkback instead of the article language
# for these languages:
my %talkbacklang=("ar"=>"en","de"=>"en","tr"=>"en","it"=>"en",
                "il"=>"en","nl"=>"en","ru"=>"en","ko"=>"en",'pl'=>'en');
my $talkbacklang;
my $lfcomment="http://cgi.linuxfocus.org/cgi-bin/lfcomment";
my $lftalkback="http://cgi.linuxfocus.org/cgi-bin/lftalkback";
#
#Note: the following is automatically overloaded. Only the chset is mandatory
#      if one key does not exit in language then it is taken from English (en)
my %intdat=(
 'ar'=>{'chset'=>"windows-1256",'doct'=>'AR','abstract'=>'äÈÐÉ ãÎÊÕÑÉ','content'=>'ÇáÝåÑÓ','wwwresp'=>'ÇáÕÝÍÇÊ ÈÑÚÇíÉ ØÇÞã áíäßÓ ÝæßõÓ','aboutauthor'=>'äÈÐÉ Úä ÇáßÇÊÈ','auth'=>'ÇáãÄáÝ','transinfo'=>'ãÚáæãÇÊ Úä ÇáÊÑÌãÉ','home'=>'ÇáÃæáì','map'=>'ÇáÎÇÑØÉ','index'=>'ÝåÑÓ','search'=>'ÈÍË','news'=>'ÃÎÈÇÑ','archives'=>'ãÍÝæÙÇÊ','links'=>'ÑæÇÈØ','aboutus'=>'Úä ÇáãÌáÉ','topmap'=>'Topbar-ar.gif','botmap'=>'Bottombar-ar.gif','alttop'=>'[Top bar]','altbot'=>'[Bottom bar]','lfcomment'=>'ÇÖÛØ åäÇ ááÊäÈíå Úä ÎØÃ Ãæ áÇÑÓÇá ãáÇÍÙÇÊß Åáì áíäßÓ ÝæßõÓ', 'lftalkback'=>'ÊÚÞíÈß Úáì åÐÇ ÇáãÞÇá','talkbacktext'=>'áßá ãÞÇá ÕÝÍÉ ÎÇÕÉ ÈÇáÊÚÞíÈÇÊ¡ ÃÑÓá ÊÚÞíÈÇ Ãæ ÇØáÚ Úáì ÊÚÞíÈÇÊ ÇáÂÎÑíä. ','goto_talkback'=>'ÕÝÍÉ ÇáÊÚÞíÈÇÊ','TranslatedToThisLangBy'=>'ÊÑÌãå Åáì ÇáÚÑÈíÉ'},
 'de'=>{'chset'=>"iso-8859-1",'doct'=>'DE','abstract'=>'Zusammenfassung','content'=>'Inhalt','wwwresp'=>'Der LinuxFocus Redaktion schreiben','aboutauthorfemale'=>'&Uuml;ber die Autorin','aboutauthormplural'=>'&Uuml;ber die Autoren','aboutauthor'=>'&Uuml;ber den Autor','auth'=>'von','home'=>'Home','map'=>'Plan','index'=>'Index','search'=>'Suchen','news'=>'Nachrichten','archives'=>'Archiv','links'=>'Links','aboutus'=>'&Uuml;ber uns','transinfo'=>'Autoren und &Uuml;bersetzer','lftalkback'=>'Talkback f&uuml;r diesen Artikel','talkbacktext'=>'Jeder Artikel hat seine eigene Seite f&uuml;r Kommentare und R&uuml;ckmeldungen. Auf dieser Seite kann jeder eigene Kommentare abgeben und die Kommentare anderer Leser sehen:','goto_talkback'=>'Talkback Seite','topmap'=>'Topbar-de.gif','botmap'=>'Bottombar-de.gif','lfcomment'=>'Einen Fehler melden oder einen Kommentar an LinuxFocus schicken','TranslatedToThisLangBy'=>'&Uuml;bersetzt ins Deutsche von'},
 'en'=>{'chset'=>"iso-8859-1",'doct'=>'EN','abstract'=>'Abstract','content'=>'Content','wwwresp'=>'Webpages maintained by the LinuxFocus Editor team','aboutauthorfemale'=>'About the author','aboutauthormplural'=>'About the authors','aboutauthorfplural'=>'About the authors','aboutauthor'=>'About the author','auth'=>'by','home'=>'Home','map'=>'Map','index'=>'Index','search'=>'Search','news'=>'News','archives'=>'Archives','links'=>'Links','aboutus'=>'About LF','transinfo'=>'Translation information','topmap'=>'Topbar-en.gif','botmap'=>'Bottombar-en.gif','alttop'=>'[Top bar]','altbot'=>'[Bottom bar]','lfcomment'=>'Click here to report a fault or send a comment to LinuxFocus', 'lftalkback'=>'Talkback form for this article','talkbacktext'=>'Every article has its own talkback page. On this page you can submit a comment or look at comments from other readers:','goto_talkback'=>'talkback page','TranslatedToThisLangBy'=>'Translated to English by','proofread'=>'proof read'},
 'it'=>{'chset'=>"iso-8859-1",'doct'=>'IT','abstract'=>'Premessa','content'=>'Contenuto','auth'=>' ','aboutauthor'=>'L\'autore','home'=>'Home','map'=>'Mappa','index'=>'Indice','search'=>'Cerca','news'=>'News','archives'=>'Archivo','links'=>'Link','aboutus'=>'Cose LF','lftalkback'=>'Discussioni su quest\'articolo','talkbacktext'=>'ogni articolo possiede una sua pagina di discussione, da questa pagina puoi inviare un commento o leggere quelli degli altri lettori:','goto_talkback'=>'pagina di discussione','TranslatedToThisLangBy'=>'Tradotto in Italiano da'},
 'es'=>{'chset'=>"iso-8859-1",'doct'=>'ES','abstract'=>'Resumen','content'=>'Contenidos','wwwresp'=>'Contactar con el equipo de LinuFocus','aboutauthor'=>'Sobre el autor','auth'=>'por','transinfo'=>'Informaci&oacute;n sobre la traducci&oacute;n','home'=>'Hogar','map'=>'Mapa','index'=>'Indice','search'=>'Busqueda','news'=>'Noticias','archives'=>'Arca','links'=>'Enlaces','aboutus'=>'Sobre LF','topmap'=>'Topbar-es.gif','botmap'=>'Bottombar-es.gif','lfcomment'=>'Pinchar aqu&iacute; para informar de alg&uacute;n problema o enviar comentarios a LinuxFocus','lftalkback'=>'Formulario de "talkback" para este art&iacute;culo','talkbacktext'=>'Cada art&iacute;culo tiene su propia p&aacute;gina de "talkback". A trav&eacute;s de esa p&aacute;gina puedes enviar un comentario o consultar los comentarios de otros lectores','goto_talkback'=>'Ir a la p&aacute;gina de "talkback"','TranslatedToThisLangBy'=>'Taducido al espa&ntilde;ol por'},
 'fr'=>{'chset'=>"iso-8859-1",'doct'=>'FR','abstract'=>'R&eacute;sum&eacute;','content'=>'Sommaire','wwwresp'=>'Site Web maintenu par l&acute;&eacute;quipe d&acute;&eacute;dition LinuxFocus','aboutauthor'=>'L&acute;auteur','auth'=>'par','home'=>'Sommaire','map'=>'Carte','index'=>'Index','search'=>'Recherche','news'=>'Nouvelles','archives'=>'Archives','links'=>'Liens','aboutus'=>'A propos','topmap'=>'Topbar-fr.gif','botmap'=>'Bottombar-fr.gif','alttop'=>'[Barre Superieure]','altbot'=>'[Barre Inferieure]','TranslatedToThisLangBy'=>'Traduit en Français par'},
 'nl'=>{'chset'=>"iso-8859-1",'doct'=>'NL','abstract'=>'Kort','content'=>'Inhoud','wwwresp'=>'Site onderhouden door het LinuxFocus editors team','aboutauthor'=>'Over de auteur','auth'=>'door', 'transinfo'=>'Vertaling info','home'=>'Home','map'=>'Map','index'=>'Index','search'=>'Zoek','news'=>'Nieuws','archives'=>'Archieven','links'=>'Links','aboutus'=>'Over LF','topmap'=>'Topbar-nl.gif','botmap'=>'Bottombar-nl.gif','alttop'=>'[Hoofd-balk]','altbot'=>'[Voet-balk]','lfcomment'=>'Klik hier om een fout te melden of commentaar te geven', 'lftalkback'=>'Talkback voor dit artikel','talkbacktext'=>'Elk artikel heeft zijn eigen talkback pagina. Daar kan je commentaar geven of commentaar van anderen lezen:','goto_talkback'=>'talkback pagina','TranslatedToThisLangBy'=>'Vertaald naar het Nederlands door'},
 'gb'=>{'chset'=>"gb2312",'doct'=>'ZH', 'abstract'=>'ÕªÒª', 'content'=>'Ä¿Â¼', 'wwwresp'=>'Ö÷Ò³ÓÉLinuxFocus±à¼­×éÎ¬»¤', 'aboutauthor'=>'¹ØÓÚ×÷Õß', 'auth'=>'by', 'home'=>'Ê×Ò³', 'map'=>'Õ¾µãµØÍ¼', 'index'=>'Ë÷Òý', 'search'=>'ËÑË÷', 'news'=>'ÐÂÎÅ', 'archives'=>'¹ýÍùÆÚ¿¯', 'links'=>'Á´½Ó', 'aboutus'=>'¹ØÓÚLF', 'transinfo'=>'·­ÒëÐÅÏ¢', 'lfcomment'=>'µã»÷ÕâÀïÏòLinuxFocus±¨¸æ´íÎó»òÌá³öÒâ¼û', 'lftalkback'=>'¶ÔÕâÆªÎÄÕÂ·¢±íÆÀÂÛ', 'talkbacktext'=>'Ã¿ÆªÎÄÕÂ¶¼ÓÐ¸÷×ÔµÄ·´À¡Ò³Ãæ¡£ÔÚÕâ¸öÒ³ÃæÀï£¬Äú¿ÉÒÔÌá½»ÆÀÂÛ£¬Ò²¿ÉÒÔ²é¿´ÆäËû¶ÁÕßµÄÆÀÂÛ£º', 'goto_talkback'=>'·´À¡Ò³Ãæ'}, 
 'jp'=>{'chset'=>"ISO-2022-JP",'TranslatedToThisLangBy'=>'Translated to Japanese by'},
 'ko'=>{'chset'=>"EUC-KR",'doct'=>'KO','abstract'=>'¿ä¾à','content'=>'Â÷·Ê','wwwresp'=>'º» À¥»çÀÌÆ®´Â ¸®´ª½ºÆ÷Ä¿½º ÆíÁýÆÀ¿¡¼­ °ü¸®ÇÕ´Ï´Ù','aboutauthor'=>'±Û¾´ÀÌ ¼Ò°³','auth'=>'Á¤º¸','home'=>'ÃÊ±âÈ­¸é','map'=>'Áöµµ','index'=>'Â÷·Ê','search'=>'Ã£¾Æº¸±â','news'=>'»õ¼Ò½Ä','archives'=>'Áö³­±â»ç ¸ðÀ½','links'=>'ÃßÃµ»çÀÌÆ®','aboutus'=>'LF¿¡ ´ëÇÏ¿©','topmap'=>'Topbar-kr.gif','botmap'=>'Bottombar-kr.gif','alttop'=>'»óÀ§¸Þ´º','altbot'=>'ÇÏÀ§¸Þ´º','lfcomment'=>'¿ÀÀÚ¸¦ Ã£À¸¼Ì°Å³ª ÀÇ°ßÀÌ ÀÖÀ¸½Ã¸é LinuxFocus·Î ¾Ë·ÁÁÖ¼¼¿ä','lftalkback'=>'ÀÌ ±â»ç¿¡ ´ëÇÑ ÀÇ°ßÀÌ ÀÖ½À´Ï´Ù','talkbacktext'=>'°¢ ±â»ç´Â ÇÇµå¹é ÆäÀÌÁö°¡ µé¾î ÀÖ½À´Ï´Ù. ÇÇµå¹éÀ» ÅëÇÏ¿© ¿©·¯ºÐÀº ÀúÀÚ¿¡°Ô ÀÇ°ßÀ» º¸³»°Å³ª ´Ù¸¥ µ¶ÀÚÀÇ ÀÇ°ßÀ» º¸½Ç ¼ö ÀÖ½À´Ï´Ù.:','goto_talkback'=>'ÇÇµå¹é ÆäÀÌÁö','TranslatedToThisLangBy'=>'Translated to Korean by'},
 'ru'=>{'chset'=>"koi8-r",'doct'=>'RU','abstract'=>'òÅÚÀÍÅ','content'=>'óÏÄÅÒÖÁÎÉÅ','aboutauthor'=>'ïÂ Á×ÔÏÒÅ','auth'=>'Á×ÔÏÒ','home'=>'äÏÍÏÊ','map'=>'ëÁÒÔÁ','index'=>'éÎÄÅËÓ','search'=>'ðÏÉÓË','news'=>'îÏ×ÏÓÔÉ','archives'=>'áÒÈÉ×Ù','links'=>'óÓÙÌËÉ', 'aboutus'=>'ðÒÏ LF','TranslatedToThisLangBy'=>'ðÅÒÅ×ÏÄ ÎÁ òÕÓÓËÉÊ'},
 'tr'=>{'chset'=>"iso-8859-9",'doct'=>'TR','abstract'=>'Özet','content'=>'Ýçerik','wwwresp'=>'Görselyöre sayfalarýnýn bakýmý, LinuxFocus Editörleri tarafýndan yapýlmaktadýr','auth'=>'tarafýndan','home'=>'Ev','map'=>'Eriþimdüzeni','index'=>'Ýçindekiler','search'=>'Arama','news'=>'Duyumlar','archives'=>'Belgelikler','links'=>'Baðlantýlar', 'topmap'=>'Topbar-tr.gif','botmap'=>'Bottombar-tr.gif','aboutus'=>'LF Nedir','aboutauthor'=>'Yazar hakkýnda','transinfo'=>'Çeviri bilgisi','lftalkback'=>'Bu yazý için görüþ bildiriminde bulunabilirsiniz','talkbacktext'=>'Her yazý kendi görüþ bildirim sayfasýna sahiptir. Bu sayfaya yorumlarýnýzý yazabilir ve diðer okuyucularýn yorumlarýna bakabilirsiniz.','lfcomment'=>'Burayý klikleyerek hatalarý rapor edebilir ya da yorumlarýnýzý LinuxFocus\'a gönderebilirsiniz','TranslatedToThisLangBy'=>'Türkçe\'ye çeviri'},
 'cn'=>{'chset'=>"Big-5",'TranslatedToThisLangBy'=>'Translated to Chinese by'},
 'pt'=>{'chset'=>"iso-8859-1",'doct'=>'pt_BR', 'abstract'=>'Abstrato', 'content'=>'Conte&uacute;do', 'wwwresp'=>'P&aacute;ginas Web mantidas pelo time de Editores LinuxFocus', 'aboutauthor'=>'Sobre o autor', 'auth'=>'por', 'home'=>'In&iacute;cio', 'map'=>'Mapa', 'index'=>'&Iacute;ndice', 'search'=>'Procura', 'news'=>'Novidades', 'archives'=>'Arquivos', 'links'=>'Links', 'aboutus'=>'Sobre LF', 'transinfo'=>'Informa&ccedil;&atilde;o sobre tradu&ccedil;&atilde;o', 'lfcomment'=>'Clique aqui para reportar uma falha ou para enviar um coment&aacute;rio para LinuxFocus', 'lftalkback'=>'Forma de respostas para este artigo', 'talkbacktext'=>'Every article has its own talkback page. On this page you can submit a comment or look at comments from other readers:', 'talkbacktext'=>'Todo artigo tem sua pr&oacute;pria p&aacute;gina de respostas. Nesta p&aacute;gina voc&ecirc; pode enviar um coment&aacute;rio ou ver os coment&aacute;rios de outros leitores:', 'goto_talkback'=>'p&aacute;gina de respostas','TranslatedToThisLangBy'=>'Traduzido para Portugu&ecirc;s por'},
 'il'=>{'chset'=>"iso-8859-8",'ignore_chset'=>"windows-1255",'doct'=>'IL', 'abstract'=>'úîöéú', 'aboutauthor'=> 'òì äîçáø', 'auth'=> 'òì-éãé', 'home'=> 'ãó áéú', 'map'=> 'ìåç', 'index'=> 'àéðã÷ñ', 'search'=> 'çéôåù', 'news'=> 'îä çãù', 'archives'=> 'àøëéåï', 'links'=> '÷éùåøéí', 'aboutus'=> 'LF òì', 'content'=>'úåëï','transinfo'=>'äñáø òì äúøâåí', 'lfcomment'=>'ìçõ ëàï áëãé ìãååç òì áòéä áàúø', 'lftalkback'=>'îòøëú úâåáåú ìîàîø äð"ì', 'talkbacktext'=>'ìëì îàîø éù îòøëú úâåáåú îùìå. áãó æä úåëì ìäåñéó äòøä àå ôùåè ìäñúëì áäòøåú ùì ÷åøàéí àçøéí', 'goto_talkback'=>'ìîòøëú úâåáåú', 'TranslatedToThisLangBy'=>'úåøâí ìòáøéú ò"é', 'proofread'=>'÷øéàú äâää'},
 'pl'=>{'chset'=>"iso-8859-1",'doct'=>'PL', 'abstract'=>'abstrakcyjne', 'content'=>'Zawartosc', 'wwwresp'=>'Strona prowadzona przez LinuxFocus Editor ', 'aboutauthor'=>'o autorze', 'auth'=>'przez', 'home'=>'Home', 'map'=>'Mapa', 'index'=>'Index', 'search'=>'Szukaj', 'news'=>'Nowosci', 'archives'=>'Archiwy', 'links'=>'Linki', 'aboutus'=>'O nas', 'transinfo'=>'informacje tlumaczeniowe', 'lfcomment'=>'kliknij tutaj zeby wyslac komentarz albo poinformowac nas o blendze', 'lftalkback'=>'Diskusja do tego artykolu', 'talkbacktext'=>'Komentarze do dyskusji:', 'goto_talkback'=>'Strona dyskusyjna'},

 );
#
# languages which get the convert to palm:
my %palm=('en'=>1,'de'=>1,'fr'=>1,'nl'=>1,'pt'=>1,'es'=>1,'it'=>1,'tr'=>1,'pl'=>1);
# enforce html Umlaute for latin1
my %islatin=('en'=>1,'de'=>1,'pt'=>1,'fr'=>1,'nl'=>1,'es'=>1,'it'=>1,'pl'=>1,);
#
# global data:
my $today;
my $parsestate=0;
my @parsedtypes;
my @parseddata;
#
my $articlename;
my $articlenumber;
my $articlecategory;
my $articletitle;
my $articleauthorimg;
my $articleauthor;
my $articleauthorgender="";
my $articleauthorname;
my @articletransinfo=();
my @articleaboutauthor;
my @articleabstract;
my @articleindex;
my $articleimage;
my $articlebody;
my ($fd_out,$infile,$tmpline,$linelen,$prestate);
#
#
my $text;
#
getopts("voTCkl:ht")||die "ERROR: No such option. -h for help.\n";
help() if ($opt_h);
if ($opt_v){
    print "lfparser version $ver\n";
    exit 0;
}
$today=today();
check_for_lfparsercfg(\%config);
#
if ($opt_k){
    print "Valid categories are:\n";
    $opt_k=0;
    foreach (sort keys %validcat){
        print " $_,";
        $opt_k++;
        print "\n" if ($opt_k % 4 ==0);
    }
    print "\nValid keyword headings are:
<H4>ArticleCategory:</H4>
<H4>AuthorImage:</H4>
<H4>TranslationInfo:</H4> or <H4>AuthorName:</H4>
<H4>AboutTheAuthor:</H4>
<H4>Abstract:</H4>
<H4>ArticleIllustration:</H4>
<H4>ArticleBody:</H4>\n";

    exit(0);
}
if ($config{'lang'}){
    die "ERROR: invalid language in configfile ~/.lfparsercfg\n" unless($intdat{$config{'lang'}}{'chset'});
    $lang=$config{'lang'};
}
if ($opt_l){
    die "ERROR: invalid language specifier\n" unless($intdat{$opt_l}{'chset'});
    $lang=$opt_l;
}
if ($config{'style'} && $config{'style'} eq "1"){
    $opt_o=1;
}
# copy keys from the english section that are not defined in this one:
foreach (keys %{$intdat{'en'}}){
    $intdat{$lang}{$_} = $intdat{'en'}{$_} unless ($intdat{$lang}{$_});
}
#
help() unless ($ARGV[0]);
$infile=$ARGV[0];
$fd_out=new IO::Handle;
if (! -f "$infile" && $infile=~/^(\d+)$/){
    # only a number given. The file name is articleNUM.meta.shtml
    die "ERROR: no such file article$1.meta.shtml\n" unless(-f "article$1.meta.shtml");
    open(OUTFD,">article$1.shtml")||die "ERROR: can not write article$1.shtml\n";
    $infile="article$1.meta.shtml";
    $fd_out->fdopen(fileno(OUTFD),"w")||die;
    print STDERR "Language: $lang, Reading $infile .... writing article$1.shtml ...\n";
}else{
    $fd_out->fdopen(fileno(STDOUT),"w")||die "ERROR: can not write to stdout\n";
}
$articlename=$infile;
$articlename=~s/meta\.//;
# basename:
$articlename=~s=^.*/==;
if ($articlename=~/(\d+)/){
    $articlenumber=$1;
}else{
    $articlenumber=0;
}
open (FF,"$infile")||die "ERROR: can not read file $infile\n";
$text="";
# here we check that all the 7 key word headings on level h4 are available:
my $headcheck=0;
my %valhead=('ArticleCategory'=>1,'AuthorImage'=>2,'AuthorName'=>3,
    'TranslationInfo'=>3,'AboutTheAuthor'=>4,'Abstract'=>5,
    'ArticleIllustration'=>6,'ArticleBody'=>7);
my %missingheading=(1=>'ArticleCategory',2=>'AuthorImage',3=>'TranslationInfo',
    4=>'AboutTheAuthor',5=>'Abstract',6=>'ArticleIllustration',7=>'ArticleBody');
my $ArticleBody=0;
my $l=0;
while(<FF>){
    $l++;
    chomp;
    if ($headcheck < 7 && /<h4>\s*(\w+)/i){
        $headcheck++;
        $ArticleBody=1 if ($headcheck==7);
        if ($valhead{$1}){
            # it's a valid heading 
            unless($valhead{$1}==$headcheck){
                die "ERROR: before line $l, I was expecting key word heading $missingheading{$headcheck}, but I found already $1\n";
            }
        }else{
            die "ERROR: line $l, key word heading not valid. The only valid
headings are:
<H4>ArticleCategory:</H4>
<H4>AuthorImage:</H4>
<H4>TranslationInfo:</H4> or <H4>AuthorName:</H4>
<H4>AboutTheAuthor:</H4>
<H4>Abstract:</H4>
<H4>ArticleIllustration:</H4>
<H4>ArticleBody:</H4>
They must come in this order and with the exact spelling as above.
One of the headings is missing or has wrong spelling.\n";
        }
    }
    s/_LF_/Linux<font color=\"#FF0000\">Focus<\/font>/g;
    s/\s+$//g; # kill tailing space
    #
    if (/<pre>/i){
        $prestate=1;
    }
    if (/<\/pre>/i){
        $prestate=0;
    }
    if ($prestate){
        # check line lenght
        $tmpline=$_;
        # count things like &amp; &uuml; as one charcater only:
        $tmpline=~s/&\w+;/x/g;
        $linelen=length($tmpline) - 81;
        # up to 82 should be ok:
        if ($linelen > 1){
            print STDERR "$infile:${l}: Warning line inside <pre> too long. This causes problems when printing the article. Try to make this line $linelen characters shorter.\n";
        }
    }
    $text.="$_\n"; # write in one long variable
}
close FF;
unless ($ArticleBody){
    die "ERROR: key word heading <H4>ArticleBody:</H4> not found\n";
}
if ($islatin{$lang}){
    htmlumlaute(\$text);
}
parse(\$text);
evalarticle();
$articleauthor=~s/\@/&#64;/g; # harden spamers life
printlf_format();

#-----
# read ~/.lfparsercfg
sub check_for_lfparsercfg($){
    my $cfghashref=shift;
    my $home=(getpwuid($>))[7];
    return 1 unless( -r "$home/.lfparsercfg");
    open(CFG,"< $home/.lfparsercfg")||die;
    while(<CFG>){
        next if (/^\s*#/);
        s/#.*//;
        s/\s+//g;
        if (/(\w+)=(\S+)/){
            $cfghashref->{$1}=$2;
        }
    }
    close CFG;
    return 0;
}
#-----
# Take the global data and print an article in LF format
sub printlf_format(){
    my $tmp;
    my $i=0;
    my $base="";
    if ($opt_t){
        $base="<BASE href=\"http://www.linuxfocus.org/English/articles/\">";
    }
    $fd_out->print("<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//$intdat{$lang}{doct}\">\n");
    if ($lang eq "ar"){
        $fd_out->print("<HTML DIR=\"rtl\" lang=\"AR\">\n");
    }else{
        $fd_out->print("<HTML>\n");
    }
$fd_out->print("<HEAD>
 <META http-equiv=\"Content-Type\" content=\"text/html; charset=$intdat{$lang}{chset}\">
 <META NAME=\"GENERATOR\" CONTENT=\"lfparser_$ver\">
 <META NAME=\"LFCATEGORY\" CONTENT=\"$articlecategory\">
 <TITLE>lf$articlenumber, ${articlecategory}: $articletitle</TITLE>
<style type=\"text/css\">
<!--
 td.top {font-family: Arial,Geneva,Verdana,Helvetica,sans-serif; font-size:12 }
 pre { font-family:monospace,Courier }
 p.cl { color:#EE9500 }
 a.nodec { text-decoration:none }
 p.trans { font-size:8pt; text-align:right }
 p.clbox { width:50%; alignment:center; background-color:#FFD700; 
           border-style:none; border-width:medium; border-color:#FFD700; 
           padding:0.5cm;  text-align:center }
 p.code { width:80%; alignment:center; background-color:#aedbe8; 
          border-style:none; border-width:medium; border-color:#aedbe8; 
          padding:0.1cm;  text-align:left }
 p.foot { background-color:#AAAAAA; color:#FFFFFF; border-style:none; 
          border-width:medium; border-color:#AAAAAA; padding:0.5cm ; 
          margin-top:0.1cm; margin-right:1cm; margin-left:1cm; 
          text-align:center }
 .mark  { background-color:#e6e6ff }
-->
</style>
 $base
</HEAD>
<BODY bgcolor=\"#ffffff\" text=\"#000000\">
 <!-- this is generated html code. NEVER use this file for your
 translation work. Instead get the file with the same article number
 and .meta.shtml in its name. Translate this meta file and then
 use lfparser program to generate the final article -->
 <!-- lfparser can be obtained from http://www.linuxfocus.org/~guido/dev/lfparser.html -->

<!-- this is used by a number of tools:
 =LF=AUTHOR: $articleauthorname
 =LF=CAT___: $articlecategory
 =LF=TITLE_: $articletitle
 =LF=NUMBER: $articlenumber
 =LF=ANAME_: $articlename
 -->

<!-- 2pdaIgnoreStart -->
");

if ($opt_o){
$fd_out->print("
<!-- start navegation bar, old style -->
<MAP name=\"top\">
  <AREA shape=\"rect\" coords=\"367,9,418,30\" href=\"../\">
  <AREA shape=\"rect\" coords=\"423,9,457,30\" href=\"../map.html\">
  <AREA shape=\"rect\" coords=\"463,9,508,30\" href=\"../indice.html\">
  <AREA shape=\"rect\" coords=\"514,9,558,30\" href=\"../Search/\">
</MAP>
<MAP name=\"bottom\">
  <AREA shape=\"rect\" coords=\"78,0,163,15\"  href=\"../News/\">
  <AREA shape=\"rect\" coords=\"189,0,284,15\" href=\"../Archives/\">
  <AREA shape=\"rect\" coords=\"319,0,395,15\" href=\"../Links/\">
  <AREA shape=\"rect\" coords=\"436,0,523,15\" href=\"../aboutus.html\">
</MAP>
<!-- IMAGE HEADER -->
<CENTER>
  <IMG src=\"../../common/images/$intdat{$lang}{topmap}\" width=\"600\" height=\"40\" border=\"0\" alt=\"$intdat{$lang}{alttop}\" ismap usemap=\"#top\" ><BR>
  <IMG src=\"../../common/images/$intdat{$lang}{botmap}\" width=\"600\" height=\"21\" border=\"0\" alt=\"$intdat{$lang}{altbot}\" ismap usemap=\"#bottom\">
</CENTER>
<!-- stop navegation bar, old style -->
");
}else{
$fd_out->print("
<!-- start navegation bar -->
 <!-- top navegation bar -->
 <TABLE summary=\"topbar_1\" cellspacing=\"0\" cellpadding=\"0\" border=\"0\" align=\"center\" width=\"90%\">
   <TR bgcolor=\"#2e2292\">
     <TD class=\"top\"><TABLE summary=\"topbar_1_logo\" cellspacing=\"0\" cellpadding=\"0\" border=\"0\" width=
       \"100%\">
         <TR><TD width=\"319\"><IMG src=\"../../common/images/logolftop_319x45.gif\"
           alt=\"[LinuxFocus-icon]\" width=\"319\" height=\"45\" align=\"left\" 
           border=\"0\"></TD>

           <TD class=\"top\">
             <TABLE summary=\"topbar_1_links\" width=\"100%\">
               <TR align=\"right\">
                 <TD class=\"top\"><A class=\"nodec\" href=\"../\"><FONT color=
                 \"#DDDDDD\" size=\"2\">$intdat{$lang}{home}</FONT></A> &nbsp;|&nbsp; <A class=
                 \"nodec\" href=\"../map.html\"><FONT color=
                 \"#DDDDDD\" size=\"2\">$intdat{$lang}{map}</FONT></A> &nbsp;|&nbsp; <A class=
                 \"nodec\" href=\"../indice.html\"><FONT color=
                 \"#DDDDDD\" size=\"2\">$intdat{$lang}{index}</FONT></A> &nbsp;|&nbsp; <A class=
                 \"nodec\" href=\"../Search/\"><FONT color=
                 \"#DDDDDD\" size=\"2\">$intdat{$lang}{search}</FONT></A> </TD>
               </TR>

               <TR align=\"right\">
                 <TD class=\"top\">
                   <HR width=\"100%\" noshade size=\"1\">
                 </TD>
               </TR>
             </TABLE>
           </TD>
         </TR>
       </TABLE>
     </TD>
   </TR>
 </TABLE>
 <!-- end top navegation bar -->
 <!-- blue bar -->
 <TABLE summary=\"topbar_2\" cellspacing=\"0\" cellpadding=\"0\" border=\"0\" align=\"center\"
 width=\"90%\">
   <TR bgcolor=\"#00ffff\">
     <TD><IMG src=\"../../common/images/transpix.gif\" width=\"1\" height=
     \"2\" alt=\"\"></TD>
   </TR>
 </TABLE>
 <!-- end blue bar -->
 <!-- bottom navegation bar -->
 <TABLE summary=\"topbar_3\" cellspacing=\"0\" cellpadding=\"0\" border=\"0\" align=\"center\"
 width=\"94%\">
   <TR bgcolor=\"#000000\">
     <TD>
       <TABLE summary=\"topbar_3_links\" cellspacing=\"0\" cellpadding=\"1\" border=\"0\" width=
       \"100%\">
         <TR align=\"center\">
           <TD WIDTH=\"20%\"><A class=\"nodec\" href=\"../News/\"><FONT color=
           \"#FFFFFF\">$intdat{$lang}{news}</FONT></A> </TD>
           <TD WIDTH=\"5%\"><FONT color=\"#FFFFFF\">|</FONT> </TD>
           <TD WIDTH=\"20%\"><A class=\"nodec\" href=\"../Archives/\"><FONT color=
           \"#FFFFFF\">$intdat{$lang}{archives}</FONT></A> </TD>
           <TD WIDTH=\"5%\"><FONT color=\"#FFFFFF\">|</FONT> </TD>
           <TD WIDTH=\"20%\"><A class=\"nodec\" href=\"../Links/\"><FONT color=
           \"#FFFFFF\">$intdat{$lang}{links}</FONT></A> </TD>
           <TD WIDTH=\"5%\"><FONT color=\"#FFFFFF\">|</FONT> </TD>
           <TD WIDTH=\"20%\"><A class=\"nodec\" href=\"../aboutus.html\"><FONT color=
           \"#FFFFFF\">$intdat{$lang}{aboutus}</FONT></A> </TD>
         </TR>
       </TABLE>
     </TD>
   </TR>
 </TABLE>
 <!-- end bottom navegation bar -->
<!-- stop navegation bar -->
");
}

$fd_out->print("
<!-- SSI_INFO -->

<!--#include virtual=\"../../dynahead.shtml\" -->

<!-- SSI_INFO STOP -->
<!-- 2pdaIgnoreStop -->

<!-- SHORT BIO ABOUT THE AUTHOR -->
<TABLE ALIGN=LEFT BORDER=0  WIDTH=\"190\" >
<TR>
<TD>
");

if ($palm{$lang}){
    $fd_out->print("
<!-- 2pdaIgnoreStart -->
<!-- PALM DOC -->
<TABLE BORDER=0 hspace=4 vspace=4> <TR> <TD>
<font size=1> <img src=\"../../common/images/2doc.gif\" width=34 align=left border=0 height=22 alt=\"convert to palm\"><a href=\"http://cgi.linuxfocus.org/cgi-bin/2ztxt\">Convert to GutenPalm</a><br>or <a href=\"http://cgi.linuxfocus.org/cgi-bin/2pda\">to PalmDoc</a></font>
</TD> </TR> </TABLE>
<!-- END PALM DOC -->
<!-- 2pdaIgnoreStop -->
<br>");
}

$fd_out->print("
<$articleauthorimg>
<BR>$intdat{$lang}{auth}  $articleauthor
<BR><BR>\n");
if (@articleaboutauthor){
    $fd_out->print("<I>".$intdat{$lang}{'aboutauthor'.$articleauthorgender}.":</I><BR>\n");
    $fd_out->print(join "", @articleaboutauthor);
    $fd_out->print("\n");
}
# some times we want it in english:
if ($talkbacklang{$lang}){
    $talkbacklang=$talkbacklang{$lang};
}else{
    $talkbacklang=$lang;
}
my $Translatedto_printed=0;
my $proofread="";
$fd_out->print("<!-- TRANSLATED TO $lang -->\n");
for $tmp (@articletransinfo){
    if ($tmp->{'to'} eq $lang && $tmp->{'from'} ne 'orig'){
        if ($tmp->{'from'} eq $lang){
            next if ($Translatedto_printed==0);
            $proofread= " [".$intdat{$lang}{'proofread'}."]";
        }else{
            $proofread= "";
        }
        $fd_out->print("<BR><BR><I>".$intdat{$lang}{'TranslatedToThisLangBy'}.":</I><BR>\n") unless($Translatedto_printed);
        $Translatedto_printed=1; # there may be a 'en to en' for proof reading 
        if ($tmp->{'linktype'} eq 'email'){
            $fd_out->print($tmp->{'name'} . "$proofread <small>&lt;".$tmp->{'link'}."&gt;</small>\n");
        }else{
            $fd_out->print($tmp->{'name'} . "$proofread (<a href=\"".$tmp->{'link'}."\"><font size=\"1\">homepage</font></a>)\n");
        }
        $fd_out->print("<br>\n");
    }
}
$fd_out->print("<!-- TRANSLATED TO STOP -->\n");
if (@articleindex){
    $fd_out->print("<BR><i>$intdat{$lang}{content}</i>:\n<UL>\n");
    for $tmp (@articleindex){
        $fd_out->print("  <LI><A HREF=\"#${articlenumber}lfindex$i\">$tmp</A></LI>\n");
        $i++;
    }
    $fd_out->print("  <LI><A HREF=\"$lftalkback?anum=$articlenumber&amp;lang=$talkbacklang\">".$intdat{$lang}{'lftalkback'}."</A></LI>\n") if (!$opt_T && $articlenumber > 100);
    $fd_out->print("</UL>\n");
}else{
    print STDERR "Warning: could not generate an article index\n";
}
$fd_out->print("\n</TD></TR></TABLE>\n<!-- HEAD OF THE ARTICLE -->\n");
$fd_out->print("<br>&nbsp;\n"); # needed due to a bug in netscape
$fd_out->print("<H2>$articletitle</H2>\n $articleimage");
$fd_out->print("\n<!-- ABSTRACT OF THE ARTICLE -->\n");
$fd_out->print("<P><i>$intdat{$lang}{abstract}</i>:\n<P>\n");
$tmp= join "", @articleabstract;
$fd_out->print($tmp);
if ($opt_o){
    # old black bar:
    $fd_out->print("</P>\n<HR size=\"2\" noshade align=\"right\"><BR>\n");
}else{
    # new blue bar:
    $fd_out->print("
<br><br><!-- HR divider --><table width=\"300\" align=\"center\" border=\"0\">
<tr>
  <td bgcolor=\"#8282e0\"><IMG src=\"../../common/images/transpix.gif\" width=\"1\" height=\"2\" alt=\"\"></td>
</tr>
</table>
");
}
$fd_out->print("<!-- BODY OF THE ARTICLE -->\n");
$fd_out->print("$articlebody\n");
$fd_out->print("<!-- 2pdaIgnoreStart -->\n");
$fd_out->print("<A NAME=\"talkback\">&nbsp;</a>
<h2>$intdat{$lang}{lftalkback}</h2>
$intdat{$lang}{talkbacktext}
<center>
<table border=\"0\"  CELLSPACING=\"2\" CELLPADDING=\"1\">
 <tr BGCOLOR=\"#C2C2C2\"><td align=center>
  <table border=\"3\"  CELLSPACING=\"2\" CELLPADDING=\"1\">
   <tr BGCOLOR=\"#C2C2C2\"><td align=center>
    <A href=\"$lftalkback?anum=$articlenumber&amp;lang=$talkbacklang\"><b>&nbsp;$intdat{$lang}{goto_talkback}&nbsp;</b></a>
   </td></tr></table>
</td></tr></table>
</center>
\n") if (!$opt_T && $articlenumber > 100);
$fd_out->print("<HR size=\"2\" noshade>\n");
$fd_out->print("<!-- ARTICLE FOOT -->
<CENTER><TABLE WIDTH=\"98%\">
<TR><TD ALIGN=CENTER BGCOLOR=\"#9999AA\" WIDTH=\"50%\">
<A HREF=\"../../common/lfteam.html\">$intdat{$lang}{wwwresp}</A>
<BR><FONT COLOR=\"#FFFFFF\">&copy; $articleauthorname, <a href=\"../../common/copy.html\">FDL</a> <BR><a href=\"http://www.linuxfocus.org\">LinuxFocus.org</a></FONT>
");
if ($opt_C){
    $fd_out->print("</TD>\n");
}else{
    $fd_out->print("<BR><a href=\"${lfcomment}?lang=${lang}&amp;article=$articlename\" target=\"_TOP\">$intdat{$lang}{lfcomment}</A><BR></TD>\n");
}

if (scalar(@articletransinfo)>0){ # set to 1 to show only a list if there is at least one translator
    $fd_out->print("<TD BGCOLOR=\"#9999AA\">\n<!-- TRANSLATION INFO -->\n");
    $fd_out->print("<font size=2>$intdat{$lang}{transinfo}:</font>\n<TABLE>\n");
    for $tmp (@articletransinfo){
        if ($tmp->{'from'} eq 'orig'){
            $fd_out->print("  <tr><td><font size=\"2\">");
            $fd_out->print($tmp->{'to'}." --&gt; -- : ");
            if ($tmp->{'linktype'} eq 'email'){
                $fd_out->print($tmp->{'name'} . " <small>&lt;".$tmp->{'link'}."&gt;</small></font></td></tr>\n");
            }else{
                $fd_out->print($tmp->{'name'} . " (<a href=\"".$tmp->{'link'}."\"><font size=\"1\">homepage</font></a>)</font></td></tr>\n");
            }
            next;
        }
        $fd_out->print("  <tr><td><font size=\"2\">");
        $fd_out->print($tmp->{'from'}." --&gt; ".$tmp->{'to'}.": ");
        if ($tmp->{'linktype'} eq 'email'){
            $fd_out->print($tmp->{'name'} . " &lt;".$tmp->{'link'}."&gt;</font></td></tr>\n");
        }else{
            $fd_out->print($tmp->{'name'} . " (<a href=\"".$tmp->{'link'}."\"><font size=\"1\">homepage</font></a>)</font></td></tr>\n");
        }
    }
    $fd_out->print("</TABLE>\n</TD>\n");
}else{
    $fd_out->print("<TD BGCOLOR=\"#9999AA\">&nbsp;");
    $fd_out->print("\n<!-- OLD FORMAT, NO TRANSLATION INFO -->\n");
    $fd_out->print("</TD>\n");
}
$fd_out->print("</TR></TABLE></CENTER>\n");
$fd_out->print("<p><font size=1>$today, generated by lfparser version $ver</font></p>\n");
$fd_out->print("<!-- 2pdaIgnoreStop -->\n");
$fd_out->print("</BODY>\n</HTML>\n");

}

#-----
# handle the parsed text chunks.
sub evalarticle{
    my $i=0;
    my $type;
    my $content; 
    my $transinfostate=0; 
    my ($link,$linktype,$name,$transinfolang1,$transinfolang2);
    # states in which we ignore <P> <BR> </P>
    my %ignorePandBR=(1=>1,2=>1,3=>1,4=>1,5=>1,6=>1,7=>1,8=>1,11=>1,12=>1);
    for $type (@parsedtypes){
        # remove empty text and &nbsp; which is inserted by WYSIWYG editors
        $parseddata[$i]=~ s/\&nbsp\;//g if ($type eq "Text");
        if ($type eq "Text" && $parseddata[$i]=~ /^[\r\n\t ]+$/){
            $i++; next;
        }
        if ($type eq "Text" && !$parseddata[$i]){
            $i++; next;
        }
        # dbg, debug:
        #print "-- $parsestate: $parseddata[$i] type: $type --\n";
        # start of article, search for heading:
        if ($parsestate==0 && $type=~/HeadingLevelTag/){
            if ($type eq "HeadingLevelTag1"){
                $articletitle=$parseddata[$i];
                $articletitle=~s/\s+/ /g;
                $parsestate++;
            }else{
                die "ERROR: The first heading must be the title of the article on level 1. Note: you may not have \"_LF_\" or nested tags in the title.\n";
            }
            $i++; next;
        }
        # ignoring of <P>, <BR>, </P> in certain states:
        if ($ignorePandBR{$parsestate}){
            if ($type eq "StartTag" && $parseddata[$i] =~/^P$/i){ $i++; next;}
            if ($type eq "StartTag" && $parseddata[$i] =~/^br$/i){ $i++; next;}
            if ($type eq "EndTag" && $parseddata[$i] =~/^\/P$/i){ $i++; next;}
        }
        # start of article, search for ArticleCategory:
        if ($parsestate==1){
            if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/ArticleCategory/){
                $parsestate++;
            }else{
                die "ERROR: The second heading must be ArticleCategory on level 4\n";
            }
            $i++; next;
        }
        #--
        # looking for the category
        if ($parsestate==2){
            if ($type eq "Text"){
                $articlecategory=$parseddata[$i];
                $articlecategory=~s/\s+//g;
                $parsestate++;
            }else{
                die "ERROR: The heading ArticleCategory must be followed by a text plain string without tags\n";
            }
            $i++; next;
        }
        #--
        # looking for the image heading
        if ($parsestate==3){
            if ($type eq "HeadingLevelTag4"){
                $parsestate++;
            }else{
                die "ERROR: The 3-rd heading must be AuthorImage after ArticleCategory description\n";
            }
            $i++; next;
        }
        #--
        # looking for the image 
        if ($parsestate==4){
            if ($type eq "StartTag" && $parseddata[$i]=~/img/i){
                $parsestate++;
                $articleauthorimg=$parseddata[$i];
            }else{
                die "ERROR: Image of author missing after AuthorImage heading\n";
            }
            $i++; next;
        }
        #--
        # looking for the AuthorName
        if ($parsestate==5){
            # the old format is AuthorName the new is TranslationInfo
            # and they are mutual exclusive
            if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/AuthorName/){
                $parsestate=6;
            }elsif ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/TranslationInfo/){
                $parsestate=7;
            }else{
                die "ERROR: AuthorName or TranslationInfo must be the heading after the Image\n";
            }
            $i++; next;
        }
        #--
        # looking for the name and e-mail or home-page 
        if ($parsestate==6){
            if ($type eq "AnchorTag"){
                $articleauthor="<" . $parseddata[$i] . ">";
                if ($parseddata[$i]=~/\" *>(.+?)<\//){
                    $articleauthorname=$1;
                }else{
                    die "ERROR: in <$parseddata[$i]>, could not extract e-mail or home-page\n";
                }
                $parsestate=8;
            }else{
                die "ERROR: AuthorName must followed by an anchor tag\n";
            }
            $i++; next;
        }
        #--
        # looking for the name and e-mail or home-page 
        # parse the TranslationInfo pre-tag:
        if ($parsestate==7){
            if ($transinfostate == 0){
                if($type eq "Text" && $parseddata[$i]=~/original in +(\w+)/i){
                    $transinfostate++;
                    die "ERROR: in TranslationInfo language $1 not supported. Type lfparser -h to see the supported languages \n" unless($intdat{$1}{'chset'});
                    $transinfolang1='orig';
                    $transinfolang2=$1;
                    $i++; next;
                }else{
                    die "ERROR1: in $parseddata[$i]: TranslationInfo must be followed by pargraph that looks like: <p>original in LANG <a href=\"nospam:....\">Author Name</a></p> or <p>original in LANG <a href=\"mailto:...\">Author Name</a></p>\n";
                }
            }else{
                if($type eq "Text" && $parseddata[$i]=~/original in +(\w+)/i){
                    die "ERROR1a: there must be only one original author under TranslationInfo\n";
                }
            }
            if ($transinfostate == 1){
                # this is still the original author but this time the A HREF=...
                # the tag can look like this:
                # a href="mailto:katja@linuxfocus.org" gender="female"
                # a href="mailto:katja@linuxfocus.org" gender="mplural"
                # a href="mailto:katja@linuxfocus.org" gender="fplural"
                if ($type eq "AnchorTag"){
                    $parseddata[$i]=~s/[\n\r\t]/ /g;
                    if ($parseddata[$i]=~/gender/i){
                        if ($parseddata[$i]=~/female/){
                            $articleauthorgender="female";
                        }elsif($parseddata[$i]=~/fplural/){
                            $articleauthorgender="fplural";
                        }elsif($parseddata[$i]=~/mplural/){
                            $articleauthorgender="mplural";
                        }
                    }
                    $parseddata[$i]=~s/gender *= *"?\w+"?//gi;
                    #$articleauthor="<" . $parseddata[$i] . ">";
                    $transinfostate++;
                    if ($parseddata[$i]=~/= *[\'\"]([^\"\']+)[\'\"] *>(.+?)<\//){
                        $articleauthorname=$2;
                    }else{
                        die "ERROR2: in <$parseddata[$i]>, can not extract name\n";
                    }
                    $link=$1; # could in this case as well be a homepage
                    $name=$2;
                    $name=~s/\s+/ /g;
                    $link=~s/\s+//g;
                    if ($link=~/(nospam|mailto):/i){
                        $linktype="email";
                        $link=~s/(nospam|mailto)://g;
                        $link=~s/\@/(at)/g; # could be several authors
                        $link=~s/\s//g;
                        $link=~s/,/, /g; # could be several authors
                        $articleauthor="$name <br> <small>&lt;$link&gt;</small>";
                    }else{
                        $linktype="homepage";
                        $articleauthor="$name <a href=\"$link\"><font size=\"1\">(homepage)</font></a>";
                    }
                    push(@articletransinfo,{'from',$transinfolang1,'to',$transinfolang2,'name',$name,'link',$link,'linktype',$linktype});
                    $i++; next;
                }else{
                    die "ERROR3: TranslationInfo must be followed by pargraph that looks like: <p>original in LANG <a href=\"nospam:email(at)domain.com\">Author Name</a></p>\n";
                }
            }
            if (($transinfostate % 2) == 0){
                # this is the "lang to lang" or already the AboutTheAuthor
                if($type eq "Text" && $parseddata[$i]=~/(\w+) +to +(\w+)/i){
                    $transinfostate++;
                    die "ERROR4: in TranslationInfo language $1 not supported. Type lfparser -h to see the supported languages \n" unless($intdat{$1}{'chset'});
                    $transinfolang1=$1;
                    $transinfolang2=$2;
                    $i++; next;
                # looking for the AboutTheAuthor
                }elsif ($type eq "HeadingLevelTag4"){
                    # here we look also for the next heading:
                    if ($parseddata[$i]=~/AboutTheAuthor/){
                        $parsestate=9;
                        die "ERROR7: TranslationInfo not complete\n" unless(scalar(@articletransinfo) > 0);
                    }else{
                        die "ERROR8: The heading after TranslationInfo must be AboutTheAuthor and not \"$parseddata[$i]\"\n";
                    }
                    $i++; next;
                }else{
                    die "ERROR5: in $parseddata[$i]: TranslationInfo must have a pargraph that looks like: <p>LANG1 to LANG2<a href=\"nospam:your(at)email.domain\">Translator Name</a></p>\nAdditional &nbsp; and other things are not allowed\n";
                }
            }
            if (($transinfostate % 2) == 1){
                if ($type eq "AnchorTag"){
                    $transinfostate++;
                    $parseddata[$i]=~s/[\r\n]/ /g;
                    $parseddata[$i]=~s/gender *= *"?\w+"?//gi;
                    if ($parseddata[$i]=~/= *[\'\"]([^\"\']+)[\'\"] *>(.+?)<\//){
                        $link=$1; # could in this case as well be a homepage
                        $name=$2;
                        $name=~s/\s+/ /g;
                        $link=~s/\s+//g;
                        if ($link=~/(nospam|mailto):/){
                            $linktype="email";
                            $link=~s/mailto://g;
                            $link=~s/nospam://g;
                            $link=~s/\@/(at)/g; # could be several authors
                            $link=~s/\s//g;
                            $link=~s/,/, /g; # could be several authors
                        }else{
                            $linktype="homepage";
                        }
                        push(@articletransinfo,{'from',$transinfolang1,'to',$transinfolang2,'name',$name,'link',$link,'linktype',$linktype});
                    }else{
                        die "ERROR2:  TranslationInfo ($parseddata[$i]): could not get name\n"; 
                    }
                    $i++; next;
                }else{
                    die "ERROR6: TranslationInfo must have a pargraph that looks like: <p>LANG1 to LANG2<a href=\"nospam:email(at)somewhere.com\">Translator Name</a></p>\n";
                }
            }
            $i++; next;
        }
        #--
        # looking for the AboutTheAuthor when there is no TranslationInfo
        if ($parsestate==8){
            if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/AboutTheAuthor/){
                $parsestate++;
            }else{
                die "ERROR: The heading after AuthorName must be AboutTheAuthor and not \"$parseddata[$i]\"\n";
            }
            $i++; next;
        }
        #--
        # reading about the author (html text without heading)
        if ($parsestate==9){
            if ($type=~/HeadingLe/){
                if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/Abstract/){
                    $parsestate++;
                }else{
                    die "ERROR: The heading after the \"about the author\" paragraph must be the Abstract and not \"$parseddata[$i]\"\n";
                }
            }else{
                # reading any html:
                if ($type=~/Tag/){
                    push(@articleaboutauthor,"<" . $parseddata[$i] . ">");
                }elsif ($type eq "Text"){
                    push(@articleaboutauthor,$parseddata[$i]);
                }else{
                    die "Programm error, unknown type $type in about author\n";
                }
            }
            $i++; next;
        }
        #--
        # reading the abstract (html text without heading)
        if ($parsestate==10){
            if ($type=~/HeadingLe/){
                if ($type eq "HeadingLevelTag4" && $parseddata[$i]=~/ArticleIllustration/){
                    $parsestate++;
                }else{
                    die "ERROR: The heading after the abstract paragraph must be ArticleIllustration but I found $type:\"$parseddata[$i]\"\n";
                }
            }else{
                # reading any html:
                if ($type=~/Tag/){
                    push(@articleabstract,"<" . $parseddata[$i] . ">");
                }elsif ($type eq "Text"){
                    push(@articleabstract,$parseddata[$i]);
                }else{
                    die "Programm error, unknown type $type in abstract\n";
                }
            }
            $i++; next;
        }
        #--
        # looking for the article illustration 
        if ($parsestate==11){
            if ($type eq "StartTag" && $parseddata[$i]=~/img/i){
                $parsestate++;
                $articleimage="<" . $parseddata[$i] . ">";
            }else{
                die "ERROR: Image of article missing after ArticleIllustration heading\n";
            }
            $i++; next;
        }
        #--
        # looking for the ArticleBody is already checked in the parser:
        if ($parsestate==12){
            if ($type eq "Body"){
                $articlebody=$parseddata[$i];
                $articlebody=~s|<A NAME="${articlenumber}lfindex\d+">&nbsp;</A>||g;
            }else{
                die "Program error: state 12 but tag-type $type instead of ArticleBody\n";
            }
            $i++; next;
        }
        #--
        $i++;
    }
    die "ERROR: invalid article meta-format, debug state $parsestate. Either you do not have a <H1> at the beginning or there is still a bug in lfparser.\n" unless ($parsestate == 12);
    parsebodyforindex(\$articlebody);
    unless ($validcat{$articlecategory}){
        print STDERR "ERROR invalid article category $articlecategory\n";
        print STDERR "valid categories are:\n";
        foreach (keys %validcat){
            print STDERR " - \"$_\"\n";
        }
        exit 1;
    }
}
#-----
# generate an index for the file.
# parse the html file body and store the H2 H3 text stings in @articleindex
# parse takes a ref to a text string as argument.
sub parsebodyforindex($){
    my $text = shift;
    my @body;
    my $h;
    my $i=0;
    while (1) {
        # First we try to pull off any plain text (anything before a "<" char)
        if ($$text =~ /\G([^<]+)/gcs) {
            push(@body,$1);
        } elsif ($$text =~ /\G<\/HTML>/igcs) {
            next;
        } elsif ($$text =~ /\G<\/body>/igcs) {
            next;
        } elsif ($$text =~ /\G<[hH]3>(.+?)<\/[hH]3>/gcs) {
            $h=$1;
            push(@body,"<A NAME=\"${articlenumber}lfindex$i\">&nbsp;</A>\n<H3>".$h ."</H3>\n");
            push(@articleindex,$h);
            $i++;
        } elsif ($$text =~ /\G<[hH]2>(.+?)<\/[hH]2>/gcs) {
            $h=$1;
            push(@body,"<A NAME=\"${articlenumber}lfindex$i\">&nbsp;</A>\n<H2>".$h ."</H2>\n");
            push(@articleindex,$h);
            $i++;
        } elsif ($$text =~ m|\G(<[^>]*>)|gcs) {
            push(@body,$1);
        } else {
            # the string is exhausted, or there's no > in it.
            last;
        }
    }
    foreach $h (@articleindex){
        $h=~s/<.+?>//g;
    }
    $articlebody=join "",@body;
}
#-----
# parse the html file and store the result in @parseddata, @parsedtypes.
# parse takes a ref to a text string as argument.
sub parse($){
    my $text = shift;
    my $type;
    my $content; 
    while (1) {
        # First we try to pull off any plain text (anything before a "<" char)
        if ($$text =~ /\G([^<]+)/gcs) {
            $content = $1; $type = 'Text';
        } elsif ($$text =~ /\G<(!--.*?--)>/gcs) {
            # we ignore comments except if they are in the article body:
            next;
            #$type = 'Comment';
            #$content = $1;
        } elsif ($$text =~ /\G<(!.*?)>/gcs) {
            $type = 'Markup';
            $content = $1;
        # Then, look for an end tag
        } elsif ($$text =~ m|\G<(/[a-zA-Z][^<]*?)>|gcs) {
            $content = $1; $type = 'EndTag';
        # Look for a <a ..> ..</a> tag:
        } elsif ($$text =~ /\G<([aA] [^>]+>([^<]+)<\/[aA])>/gcs) {
            $content = $1; $type = "AnchorTag";
        # Look for a h[0-9] tag:
        } elsif ($$text =~ /\G<[hH](\d)>([^<]+)<\/[hH]\d>/gcs) {
            $content = $2; $type = "HeadingLevelTag$1";
            if ("$1" eq "4" && index($content,"ArticleBody")> -1){
                $content=$';
                $type="Body";
                push(@parseddata,$content);
                push(@parsedtypes,$type);
                last;
            }
        # Then, finally we look for a start tag
        # We know the first char is <, make sure there's a >
        } elsif ($$text =~ /\G<(.+?)>/gcs) {
            $content = $1; $type = 'StartTag';
        } else {
            # the string is exhausted, or there's no > in it.
            last;
        }
        #print "dbg $content type: $type\n";
        push(@parseddata,$content);
        push(@parsedtypes,$type);
    }
}
#--------------
sub htmlumlaute($){
    my $txt_ptr=shift;
	$$txt_ptr=~s/¡/\&iexcl;/g;
	$$txt_ptr=~s/¿/\&iquest;/g;
	$$txt_ptr=~s/À/\&Agrave;/g;
	$$txt_ptr=~s/Á/\&Aacute;/g;
	$$txt_ptr=~s/Â/\&Acirc;/g;
	$$txt_ptr=~s/Ã/\&Atilde;/g;
	$$txt_ptr=~s/Ä/\&Auml;/g;
	$$txt_ptr=~s/Å/\&Aring;/g;
	$$txt_ptr=~s/Ç/\&Ccedil;/g;
	$$txt_ptr=~s/È/\&Egrave;/g;
	$$txt_ptr=~s/É/\&Eacute;/g;
	$$txt_ptr=~s/Ê/\&Ecirc;/g;
	$$txt_ptr=~s/Ë/\&Euml;/g;
	$$txt_ptr=~s/Ì/\&Igrave;/g;
	$$txt_ptr=~s/Í/\&Iacute;/g;
	$$txt_ptr=~s/Î/\&Icirc;/g;
	$$txt_ptr=~s/Ï/\&Iuml;/g;
	$$txt_ptr=~s/Ñ/\&Ntilde;/g;
	$$txt_ptr=~s/Ò/\&Ograve;/g;
	$$txt_ptr=~s/Ó/\&Oacute;/g;
	$$txt_ptr=~s/Ô/\&Ocirc;/g;
	$$txt_ptr=~s/Õ/\&Otilde;/g;
	$$txt_ptr=~s/Ö/\&Ouml;/g;
	$$txt_ptr=~s/Ø/\&Oslash;/g;
	$$txt_ptr=~s/Ù/\&Ugrave;/g;
	$$txt_ptr=~s/Ú/\&Uacute;/g;
	$$txt_ptr=~s/Û/\&Ucirc;/g;
	$$txt_ptr=~s/Ü/\&Uuml;/g;
	$$txt_ptr=~s/Ý/\&Yacute;/g;
	$$txt_ptr=~s/ß/\&szlig;/g;
	$$txt_ptr=~s/à/\&agrave;/g;
	$$txt_ptr=~s/á/\&aacute;/g;
	$$txt_ptr=~s/â/\&acirc;/g;
	$$txt_ptr=~s/ã/\&atilde;/g;
	$$txt_ptr=~s/ä/\&auml;/g;
	$$txt_ptr=~s/å/\&aring;/g;
	$$txt_ptr=~s/æ/\&aelig;/g;
	$$txt_ptr=~s/ç/\&ccedil;/g;
	$$txt_ptr=~s/è/\&egrave;/g;
	$$txt_ptr=~s/é/\&eacute;/g;
	$$txt_ptr=~s/ê/\&ecirc;/g;
	$$txt_ptr=~s/ë/\&euml;/g;
	$$txt_ptr=~s/ì/\&igrave;/g;
	$$txt_ptr=~s/í/\&iacute;/g;
	$$txt_ptr=~s/î/\&icirc;/g;
	$$txt_ptr=~s/ñ/\&ntilde;/g;
	$$txt_ptr=~s/ò/\&ograve;/g;
	$$txt_ptr=~s/ó/\&oacute;/g;
	$$txt_ptr=~s/ô/\&ocirc;/g;
	$$txt_ptr=~s/ö/\&ouml;/g;
	$$txt_ptr=~s/ù/\&ugrave;/g;
	$$txt_ptr=~s/ú/\&uacute;/g;
	$$txt_ptr=~s/û/\&ucirc;/g;
	$$txt_ptr=~s/ü/\&uuml;/g;
}
#--------------
sub today(){
    my @ltime = localtime;
    #return a date in yyyy-mm-dd format
    my $today;

    $today =  sprintf("%04d-%02d-%02d",1900 + $ltime[5],$ltime[4] + 1,$ltime[3]);
    $today;
}
#-----
#
sub help(){
print "lfparser -- parse a LinuxFocus article in HTML meta syntax and
generate a final LinuxFocus article. The HTML meta syntax is described
in http://www.linuxfocus.org/~guido/dev/lfparser.html
It is a special HTML format that can easily be edited and converted to
the released article format. It gives LinuxFocus the flexibilty to change
the layout without editing all articles.

USAGE: lfparser [-hCktoTv][-l ar|cn|de|en|es|fr|gb|il|jp|ko|nl|pt|pl|ru|it|tr] articleX.meta.shtml > articleX.shtml 
or
USAGE: lfparser [-hCktoTv][-l ar|cn|de|en|es|fr|gb|il|jp|ko|nl|pt|pl|ru|it|tr] num

OPTIONS: -h this help
         -C do not generate a link to lfcomment 
         -l select a language for the output [config file: lang=xx]
         -k list all valid categories, and H4 headings and exit
         -o use old style header [config file: style=1]
         -T do not include talkback
         -t test mode. This inserts a <BASE href=..> into the
            article to include the images and other stuff from 
            ../../common/ without the need to have them locally available.
            This option must not be used for the final article.
         -v print version and exit.

If you do not specify a filename as argument but just a number
then lfparser will seatch for a file called article<num>.meta.shtml
in the current directory and write to article<num>.shtml
This is a shortcut to save some typing.

EXAMPLE: French: 
         lfparser -l fr article111.meta.shtml > article111.shtml
         or as shortcut:
         lfparser -l fr 111

         Arabic: 
         lfparser -l ar articleX.meta.shtml > articleX.shtml

You can have an optional ~/.lfparsercfg file with the following
syntax:
# comment
lang = de # make German the default language
style = 2 # new style, 1 would be old style
#

This will then set the configuration options described under
OPTIONS and you can run lfparser without specifying any options:
lfparser articleX.meta.shtml > articleX.shtml
This is lfparser version: $ver\n";

exit;
}
__END__