# -*- encoding: utf-8 -*-

use strict;
use utf8;

use constant DEBUG => 0;
use constant DEBUGN => 0;

#args : FILECODE, REPCODE
my $file = $ARGV[0];
my $rep = $ARGV[1];

my $file_codes = 'noms-des-codes.txt';
open my $source, "<", $file_codes;
my %codes;
while (defined (my $line = <$source>)) {
    # on remplit la table de hachage %codes
    if ($line =~ m/^.*\:.*\:.*\:(.*)\:(.*)$/) {
	my $n = $1;
	my $m = $2;
	$n =~ s/\s*(.*)\s*/$1/;
	$m =~ s/\s*(.*)\s*/$1/;
        $codes{$n} = $m;
    }
}
close($file_codes);

my $CODE = $rep;

my @t;

my $articles = {};
my @en_cours;
my $flag_grp = 0;
my $nbr_grp = 0;

open(FILE, ">sortie.xml") || die "Erreur E/S : $!\n";
open my $source, "<", $file;
while (defined (my $line = <$source>)) {
    if (($line =~ m/article/i)
	and ($line !~ m/<niveau\d/)
	and ($line !~ m/<article/) 
	and ($line !~ m/<\/article/)
	and ($line !~ m/<\/Articles/)
	and ($line !~ m/<Articles/)) {
#	$line =~ s/^.*rticle(s?)\s(.*)$/$2/g;
	print "============================= ARTICLE =====\n"
	    if (DEBUG or DEBUGN);

	#if ($line =~ m/(([Aa])rticle(s?)\s?(\d+(-\d+)?))\s(?!X)/) {
	### nb: d'où qu'il sort le "\s(?!X)" ?
	if ($line =~ m/(([Aa])rticle(s?)\s?(\d+(-\d+)?))/) {
	    my $e = 0;
	    my $cod = '';
	    my $deja = 0;
	    my @done = ();
	    @t = split /\s+|\(|\)|\[|\]|\*/, $line;
	    foreach my $m (0..@t) {
		if ($t[$m] =~ m/\d+(-\d+)?/) {
		    next if ($t[$m - 1] !~ m/rticle/);
		    my $not_to_do;
		    foreach my $n (@done) {
			$deja = 1 if ($t[$m] eq $n);
		    }
		    next if ($deja);
 		    print $line if DEBUGN;
 		    print "-------------\n" if DEBUGN;
		    print "Faits: @done\n" if DEBUGN;
		    foreach my $i ($m - 5..$m + 7) {
			$not_to_do .= $t[$i].' ';
			my $titre;
			if ($t[$i] =~ m/code/) {
			    foreach my $j ($i+1..$i+10) {
				$titre .= $t[$j].' ';
			    }
			    $cod = recup_code($titre, $i, \@t);
			    last;
			}
		    }
		    if ($not_to_do =~ m/de la loi/ or 
			$not_to_do =~ m/décret/i or 
			$not_to_do =~ m/ordonnance/i or 
			$not_to_do =~ m/réglement/i or 
			$not_to_do =~ m/circulaire/i) {
			next;
		    } else {
			if (not defined $cod or $cod eq '') {
			    $cod = $CODE;
			}
			push @done, $t[$m] unless $deja;
			my ($nom, $num) = ($t[$m - 1], $t[$m]);
			my $full_url = '<a href="' . brut2url_v2($num, $cod)
			    . '">' . $nom . ' ' .$num . '</a>';
			print "$nom $num \t $full_url\n" if DEBUGN;
			$line =~ m/($nom $num)(.{5})/;
			if ($2 !~ m/\/a>/) {
			    #ruse de sioux pour éviter d'écrire 2 fois...
			    $line =~ s/($nom $num)/$full_url/g;
			    print "----------\n" if DEBUGN;
			    print $line if DEBUGN;
			}
		    }
		}
	    }
	}

	if ($line =~ m/[LRDA]\.\s\d+/) {
	    my $e = 0;
	    print "=================== LRDA =====\n" if DEBUG;
	    print $line if DEBUG;
	    print "-------------\n" if DEBUG;
	    @t = split /\s+|\(|\)|\[|\]|\*/, $line;
	    foreach my $m (0..@t) {
		if ($t[$m] =~ m/[LRDA]\./) {
		    $nbr_grp += 1;
		    $articles->{brut2text($t[$m], $t[$m + 1])} = brut2url($t[$m], $t[$m + 1]);
		    if ($t[$m + 2] =~ m/[LRDA]\./) {
			$en_cours[$e] = brut2text($t[$m], $t[$m + 1]);
			$en_cours[$e+1] = brut2text($t[$m + 2], $t[$m + 3]);
			$e ++;
			$nbr_grp += 1;
		    } elsif ($t[$m + 3] =~ m/[LRDA]\./) {
			$en_cours[$e] = brut2text($t[$m], $t[$m + 1]);
			$en_cours[$e+1] = brut2text($t[$m + 3], $t[$m + 4]);
			$e ++;
			$nbr_grp += 1;
		    } elsif ($t[$m + 4] =~ m/[LRDA]\./) {
			$en_cours[$e] = brut2text($t[$m], $t[$m + 1]);
			$en_cours[$e+1] = brut2text($t[$m + 4], $t[$m + 5]);
			$e ++;
			$nbr_grp += 1;
		    } else {
			$en_cours[$e] = brut2text($t[$m], $t[$m + 1]);
			$flag_grp = 1;
		    }
		}
		if ($nbr_grp != 0 and $flag_grp == 1) {
		    my $s = $m - 2 * $nbr_grp;
		    foreach my $i ($s - 5..$m + 5) {
			if ($t[$i] =~ m/code/) {
			    my $titre;
			    foreach my $j ($i+1..$i+10) {
				$titre .= $t[$j].' ';
			    }
			    my $cod = recup_code($titre, $i, \@t);
			    foreach my $a (@en_cours) {
				$articles->{$a}	=
				    brut2url($t[$m], $t[$m + 1], $cod);
			    }
			    last;
			}
		    }
		    to_string($articles);
		    print "-------------\n" if DEBUG;
		    foreach my $x (keys(%{$articles})) {
			my $url = get_full_url($x);
			print $x, "\n" if DEBUG;
			$line =~ s/$x/$url/;
		    }
		    print "En-cours : ", join(' ', @en_cours), "\n" if DEBUG;
		    print "-------------\n" if DEBUG;
		    print $line, "\n" if DEBUG;
		    $flag_grp = 0;
		    $nbr_grp = 0;
		    $e = 0;
		    $articles = {};
		    @en_cours = ();
		}
	    }
	}
    }
    print FILE "$line";
}


close($file);
close(FILE);

sub get_full_url {
    my $art = shift;
    return '<a href="'.$articles->{$art}.'">'.$art.'</a>';
}

sub to_string() {
    my $articles = shift;
    my @str;
    my $i = 0;
    foreach my $k (keys(%{$articles})) {
	@str[$i] = $k.' -> '.$articles->{$k};
	$i ++;
    }
    print '{'.join(' ; ', @str)."}\n" if DEBUG;
}

sub brut2text {
    my ($L, $N) = @_;
#    foreach my $str ($L, $N) {
    foreach my $str ($N) {
	$str =~ s/\.//g;
	$str =~ s/\*//g;
	$str =~ s/\)//g;
	$str =~ s/\(//g;
	$str =~ s/\]//g;
	$str =~ s/\[//g;
	$str =~ s/\,//g;
    }
    foreach my $str ($L) {
	$str =~ s/\)//g;
	$str =~ s/\(//g;
	$str =~ s/\*//g;
	$str =~ s/\[//g;
	$str =~ s/\]//g;
    }
    return $L.' '.$N;
}

sub brut2url {
    my ($L, $N, $C) = @_;
    my $portal = '/';

    foreach my $str ($L, $N) {
	$str =~ s/\.//g;
	$str =~ s/\,//g;
	$str =~ s/\*//g;
	$str =~ s/\"//g;
	$str =~ s/\'//g;
	$str =~ s/\)//g;
	$str =~ s/\(//g;
	$str =~ s/\]//g;
	$str =~ s/\[//g;
    }
    if (defined $C and $C ne '') {
	print "Code : $C\n" if DEBUG;
	return $portal.$C.'article-'.lc($L).$N;
    } else {
	return $portal.$CODE.'article-'.lc($L).$N;
    }
}

sub brut2url_v2 {
    my ($N, $C) = @_;
    my $portal = '/';

    foreach my $str ($N) {
	$str =~ s/\.//g;
	$str =~ s/\,//g;
	$str =~ s/\*//g;
	$str =~ s/\"//g;
	$str =~ s/\'//g;
	$str =~ s/\)//g;
	$str =~ s/\(//g;
	$str =~ s/\]//g;
	$str =~ s/\[//g;
    }
    if (defined $C and $C ne '') {
	print "Code : $C\n" if DEBUGN;
	return $portal.$C.'article-'.$N;
    }
}

sub recup_code {
    my ($title, $h, $ligne) = @_;
    my @t = @$ligne;
    my $cod = '';

#    print "Title : $title\n";
#    print "Mots : $t[$h+1] - $t[$h+2]\n";

    if ($t[$h+2] =~ m/travail/) {
	if ($title =~ m/Mayotte/) {
	    $cod = $codes{'Mayotte'};
	} elsif ($title =~ m/maritime/) {
	    $cod = $codes{'travail maritime'};
	} else {
	    $cod = $codes{'travail'};
	}
    } elsif ($t[$h+3] =~ m/justice/) {
	if ($title =~ m/administrative/) {
	    $cod = $codes{'justice administrative'};
	} elsif ($title =~ m/militaire/) {
	    $cod = $codes{'justice militaire'};
	}
    } elsif ($t[$h+2] =~ m/pensions/) {
	if ($title =~ m/civiles et militaires/) {
	    $cod = $codes{'pensions civiles et militaires'};
	} elsif ($title =~ m/retraite des marins/) {
	    $cod = $codes{'pensions de retraite des marins'};
	} elsif ($title =~ m/militaires d'invalidité/) {
	    $cod = $codes{"pensions militaires d'invalidité"};
	}
    } elsif ($t[$h+1] =~ m/général/) {
	if ($t[$h+3] =~ m/impôts/) {
	    if ($title =~ m/annexe 1/) {
		$cod = $codes{'impots annexe 1'};
	    } elsif ($title =~ m/annexe 2/) {
		$cod = $codes{'impots annexe 2'};
	    } elsif ($title =~ m/annexe 3/) {
		$cod = $codes{'impots annexe 3'};
	    } elsif ($title =~ m/annexe 4/) {
		$cod = $codes{'impots annexe 4'};
	    } elsif ($title =~ 'livre des procédures') {
		$cod = $codes{'procédures fiscales'};
	    } else {
		$cod = $codes{'impôts'};
	    }
	} elsif ($title =~ m/la propriété/) {
	    $cod = $codes{'général de la propriété'};
	} elsif ($title =~ m/des collectivités/) {
	    $cod = $codes{'général des collectivités'};
	}
    } elsif ($t[$h+1] =~ m/forestier/) {
	if ($title =~ m/Mayotte/) {
	    $cod = $codes{'forestier de Mayotte'};
	} else {
	    $cod = $codes{'forestier'};
	}
    } elsif ($t[$h+2] =~ m/douanes/) {
	if ($title =~ m/Mayotte/) {
	    $cod = $codes{'douanes de Mayotte'};
	} else {
	    $cod = $codes{'douanes'};
	}
    } elsif ($t[$h+2] =~ m/procédure/) {
	if ($title =~ m/fiscales/) {
	    $cod = $codes{'procédures fiscales'};
	} elsif ($title =~ m/civile/) {
	    $cod = $codes{'procédure civile'};
	} elsif ($title =~ m/pénale/) {
	    $cod = $codes{'procédure pénale'};
	}
    } elsif ($t[$h+2] =~ m/domaine/) {
	if ($title =~ m/public fluvial/) {
	    $cod = $codes{'public fluvial'};
	} elsif ($title =~ m/collectivités publiques/) {
	    $cod = $codes{'collectivités publiques'};
	} else {
	    $cod = $codes{'domaine'};
	}
    } elsif ($t[$h+2] =~ m/communes/) {
	if ($title =~ m/Calédonie/) {
	    $cod = $codes{'Nouvelle-Calédonie'};
	} else {
	    $cod = $codes{'communes'};
	}
    } else {
	#print "Nouveau title : $title\n";
	foreach my $g (sort{$a cmp $b} keys(%codes)) {
	    my $reg = qr/$g/i;
	    #print "K : $g\n" if ($title =~ /$reg/);
	    if ($title =~ /$reg/) {#m/^(.*)(.*)$/i) {
		#print $title."\t->\t".$g;
		$cod = $codes{$g};
		last;
	    }
	}
    }
    #print "Code : '$cod'\n-------\n";
    return $cod;
}

__END__
