User:ArielGlenn/scripts/mgxlit.pl

I use this to transliterate from greek to the transliteration scheme on the About_Greek page. Sure hope I got all the rules right. It's ot meant to be good perl, just maintainable.


 * 1) !/usr/bin/perl

binmode(STDOUT, ":utf8"); binmode(STDIN, ":utf8");

use utf8;
 * 1) use encoding "utf8";


 * 1) transcription:


 * 1)  β γ γγ γχ  δ ζ θ  κ λ μ μπ   ν ντ ξ  π ρ σ τ φ χ  ψ
 * 2)  v g ng nch d z th k l m mb|b n nt ks p r s t f ch ps


 * 1)  αά  εέ  ηή  ιίϊΐ  οό  υύϋΰ  ωώ
 * 2)  aá  eé  ií  iíïí  oó  yýÿý  oó
 * 3)  aá  eé  ií  iíïí  oó  yýyý  oó


 * 1)  aá  eé  ií  iíií  oó  yýyý  oó


 * 1) αυ     αύ     ευ     εύ     ηυ     ηύ     ου   ού
 * af|av áf|áv  ef|ev  éf|év  if|iv  íf|ív  ou   oú


 * 1) αυ     αύ     ευ     εύ     ηυ     ηύ     ου   ού οϋ
 * 2) af|av  áf|áv  ef|ev  éf|év  if|iv  íf|ív  ou   oú oy


 * 1) rules:

sub get_next_cluster { my ($index,@letters) = @_;

if ($index == $#letters) { return(0); }	SWITCH: { # consonants ($letters[$index] =~ /β/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /γ/) && do { if ($letters[$index+1] =~ /[κγξχ]/) { return(2,$letters[$index],$letters[$index+1]); }               else { return(1,$letters[$index]); }           };	    ($letters[$index] =~ /δ/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /ζ/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /θ/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /λ/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /μ/) && do { if ($letters[$index+1] =~ /π/) { return(2,$letters[$index],$letters[$index+1]); }               else { return(1,$letters[$index]); }           };	    ($letters[$index] =~ /ν/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /ξ/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /π/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /ρ/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /σ/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /ς/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /τ/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /φ/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /χ/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /ψ/) && do { return(1,$letters[$index]); };         # vowels ($letters[$index] =~ /α/) && do { if ($letters[$index+1] =~ /[υύ]/) { return(2,$letters[$index],$letters[$index+1]); }               else { return(1,$letters[$index]); }           };	    ($letters[$index] =~ /ά/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /ε/) && do { if ($letters[$index+1] =~ /[υύ]/) { return(2,$letters[$index],$letters[$index+1]); }               else { return(1,$letters[$index]); }           };	    ($letters[$index] =~ /έ/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /η/) && do { if ($letters[$index+1] =~ /[υύ]/) { return(2,$letters[$index],$letters[$index+1]); }               else { return(1,$letters[$index]); }           };	    ($letters[$index] =~ /ή/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /ι/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /ί/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /ϊ/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /ΐ/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /ο/) && do { if ($letters[$index+1] =~ /[υύϋ]/) { return(2,$letters[$index],$letters[$index+1]); }               else { return(1,$letters[$index]); }           };	    ($letters[$index] =~ /ό/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /υ/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /ύ/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /ϋ/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /ΰ/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /ω/) && do { return(1,$letters[$index]); };	   ($letters[$index] =~ /ώ/) && do { return(1,$letters[$index]); };         # default do { return(1,$letters[$index]); };     } }

sub cluster_to_sym { my($count,@cluster) = @_; if ($count == 0) { return(""); }       if ($count == 1) { if ($cluster[0] =~ /β/) { return("v"); } if ($cluster[0] =~ /γ/) { return("g"); } if ($cluster[0] =~ /δ/) { return("d"); } if ($cluster[0] =~ /ζ/) { return("z"); } if ($cluster[0] =~ /θ/) { return("th"); } if ($cluster[0] =~ /κ/) { return("k"); } if ($cluster[0] =~ /λ/) { return("l"); } if ($cluster[0] =~ /μ/) { return("m"); } if ($cluster[0] =~ /ν/) { return("n"); } if ($cluster[0] =~ /ξ/) { return("x"); } if ($cluster[0] =~ /π/) { return("p"); } if ($cluster[0] =~ /ρ/) { return("r"); } if ($cluster[0] =~ /σ/) { return("s"); } if ($cluster[0] =~ /ς/) { return("s"); } if ($cluster[0] =~ /τ/) { return("t"); } if ($cluster[0] =~ /φ/) { return("f"); } if ($cluster[0] =~ /χ/) { return("ch"); } if ($cluster[0] =~ /ψ/) { return("ps"); } if ($cluster[0] =~ /α/) { return("a"); } if ($cluster[0] =~ /ά/) { return("á"); } if ($cluster[0] =~ /ε/) { return("e"); } if ($cluster[0] =~ /έ/) { return("é"); } if ($cluster[0] =~ /η/) { return("i"); } if ($cluster[0] =~ /ή/) { return("í"); } if ($cluster[0] =~ /ι/) { return("i"); } if ($cluster[0] =~ /ί/) { return("í"); } if ($cluster[0] =~ /ϊ/) { return("ï"); } if ($cluster[0] =~ /ΐ/) { return("í"); } if ($cluster[0] =~ /ο/) { return("o"); } if ($cluster[0] =~ /ό/) { return("ó"); } if ($cluster[0] =~ /υ/) { return("y"); } if ($cluster[0] =~ /ύ/) { return("ý"); } if ($cluster[0] =~ /ϋ/) { return("ÿ"); } if ($cluster[0] =~ /ΰ/) { return("ý"); } if ($cluster[0] =~ /ω/) { return("o"); } if ($cluster[0] =~ /ώ/) { return("ó"); } }  if ($count == 2) { if (($cluster[0] =~ /γ/) && ($cluster[1] =~ /γ/)) { return("ng"); } if (($cluster[0] =~ /γ/) && ($cluster[1] =~ /κ/)) { return("gk"); } if (($cluster[0] =~ /γ/) && ($cluster[1] =~ /ξ/)) { return("gx"); } if (($cluster[0] =~ /γ/) && ($cluster[1] =~ /χ/)) { return("nch"); } if (($cluster[0] =~ /μ/) && ($cluster[1] =~ /π/)) { return("b"); }
 * 1) 	if ($cluster[0] =~ /ΐ/) { return("ḯ"); }
 * 1) 	if ($cluster[0] =~ /ΰ/) { return("ý"); }
 * 2) this is a problem.  I really want something that doesn't exist
 * 3) in the unicode tables, i.e. y with accent and diaeresis.
 * 1) we change these later if needed.

if (($cluster[0] =~ /α/) && ($cluster[1] =~ /υ/)) { return("av"); } if (($cluster[0] =~ /α/) && ($cluster[1] =~ /ύ/)) { return("áv"); } if (($cluster[0] =~ /ε/) && ($cluster[1] =~ /υ/)) { return("ev"); } if (($cluster[0] =~ /ε/) && ($cluster[1] =~ /ύ/)) { return("év"); } if (($cluster[0] =~ /η/) && ($cluster[1] =~ /υ/)) { return("iv"); } if (($cluster[0] =~ /η/) && ($cluster[1] =~ /ύ/)) { return("ív"); } if (($cluster[0] =~ /ο/) && ($cluster[1] =~ /υ/)) { return("ou"); } if (($cluster[0] =~ /ο/) && ($cluster[1] =~ /ύ/)) { return("oú"); } if (($cluster[0] =~ /υ/) && ($cluster[1] =~ /ι/)) { return("yi"); } if (($cluster[0] =~ /υ/) && ($cluster[1] =~ /ί/)) { return("yí"); } }  return($cluster[0]); }
 * 1) 	if (($cluster[0] =~ /ο/) && ($cluster[1] =~ /ϋ/)) { return("oy"); }
 * 1)   return(" ");

sub modify_from_following { my($sym,$index,@letters) = @_;

if ($index > $#letters) { return($sym); }   ($peek_count,@peek_cluster) = get_next_cluster($index,@letters); my $sym_next = ""; if ($peek_count) { $sym_next = $peek_cluster[0]; }

if ($sym =~ /av/) { if (($sym_next =~ /^[θκξπστφχψ\W]/) || ($sym_next =~ /^$/)) { return("af"); } else { return($sym); } }

if ($sym =~ /áv/) { if (($sym_next =~ /^[θκξπστφχψ\W]/) || ($sym_next =~ /^$/)) { return("áf"); } else { return($sym); } }

if ($sym =~ /ev/) { if (($sym_next =~ /^[θκξπστφχψ\W]/) || ($sym_next =~ /^$/)) { return("ef"); } else { return($sym); } }

if ($sym =~ /év/) { if (($sym_next =~ /^[θκξπστφχψ\W]/) || ($sym_next =~ /^$/)) { return("éf"); } else { return($sym); } }

if ($sym =~ /iv/) { if (($sym_next =~ /^[θκξπστφχψ\W]/) || ($sym_next =~ /^$/)) { return("if"); } else { return($sym); } }

if ($sym =~ /ív/) { if (($sym_next =~ /^[θκξπστφχψ\W]/) || ($sym_next =~ /^$/)) { return("íf"); } else { return($sym); } }

return($sym); }

sub is_following { my ($index,@letters) = @_;

if ($index >= $#letters) { return(0); }   ($peek_count,@peek_cluster) = get_next_cluster($index,@letters); return($peek_count); }

sub b_to_mp { my($sym,$sym_preceding,$index,@letters) = @_;

if ($sym =~ /^$/) { return($sym); } if ($sym =~ /b/) { if ($sym_preceding =~ /^.+$/) { if (is_following($index,@letters)) { return("mp"); }	}   }    return($sym); }

while () { $line = lc($_); $newline = ""; @letters = split(//,$line); $precedes = ""; $index = 0; $sym_preceding = ""; $sym = ""; $output = ""; while (($count, @cluster) = get_next_cluster($index,@letters)) { if ($count == 0) { last; }     # convert to transliterated value, first approx.        $sym = cluster_to_sym($count,@cluster); # check for following letters $sym = modify_from_following($sym,$index+$count,@letters);

# special case: b or mp       $sym = b_to_mp($sym,$sym_preceding,$index,@letters);


 * 1)        for ($i=0; $i<$count; $i++ ) {
 * 2) 	   print "$cluster[$i]";
 * 3)        }
 * 4)        print "($sym)";
 * 5)        print " ";

$output .= $sym; $index += $count; $sym_preceding = $sym; $sym = ""; }   print "$output\n"; }

exit 0;