<?php
/****
  Open Bugs:
    frag2DAG fails to populate dag with v,e,V,etc.
    indexes by .NN vs by 0..nn-1 need a cross-referencing map.
      assigning a node into the node array is by number nn.  But then put .NN into a map back to the numerical index.
      $dag.nid[NN] = nn
      inside tierIntoDag, we put the boundary name NN into dag[V][dag[v]++], we should also put its number somewhere
      $dag["Vn"][$boundary] = $dag[v]++
  
  Fixed:
    x edge ugly

****/

/* *********
// tglib.php
   *********

// Let a TG just be instantiated in PHP strings each for a tier
// Each a sequence of fragments (verses, paragraphs, convenient sized units to look at more than one on an editor's screen.)
// Each fragment is a tier-named named array of tier strings, and
// For each tier-pair a set of nodes or index pairs, each index picks within its a between-point 0 to n for N letter strings

// Let "node" also mean "boundary".

// Let . in the primary sources be converted to \.  Then,
// Let .<LETTER># indicate as follows:
//          .: Boundary,
//   <LETTER>: anchoring tier where the Boundary is defined, required in both defining tier and referencing tiers.
//          #: the ordinal of this boundary in that tier,
//             # can be in 0..$, where $=N with N the highest boundary number in the named tier.

// It is natural for tiers to be built up by analysis of previously-analyzed or -written tiers,
// but it is acceptable for a .L# boundary to refer to any tiers also.
// Nodes are unambiguous because defined in only one tier, its source.  You could decide the source or reference tier for a node should be a different tier, but then you
// must re-number the affected nodes.

// ^: Let Prev ^Succ indicate deletion of the last letter of Prev followed by suffixation of Succ.

P = Pratyanjihnahrdayam = TG() =
 {
   N 20 verses in Nagari, nodes = spaces|punct
   R 20 verses transliterated roman, nodes = spaces|punct
   I 20 verses with sandhi unwound to show inflectional morphemes & boundaries delimited by -
   D 20 verses with derivational morphemes delimited by +
   C 20 verses with grammatical morpheme labels/codings
   M 20 verses with stem-wise translations
   P 20 verses with phrasal translations
   F 20 verses with full-verse translations
   X 20 verses with interpretive commentary
 }

Several dictionary resources:
   syntactical notes including notes regarding ellipsis and implied forms (copula deletion)
   grammatical morpheme names/codings, forms,
   content root morphemes with POS and L1 meaning
   content stem morphemes with POS and L1 meaning
   conjugational unit level translations into L1
   clausal translations into L1
   fragment translations into L1

Dependency checking, # of nodes should be ordered as 
  N = R < S < I < D = C > M > P > F = X

Data entry method is writing PHP strings, for example:

$fragN = 1;
R[1] = ".N0 chitih .N1 svatantraa .N2 viSvasiddhihetuh .N$";
I[1] = ".N0 chiti .I1 -h .N1 svatantra^ .I2 -aa .N2 viSva .I3 siddhi .I4 hetu- .I5 -h .N$";
D[1] = ".N0 chiti .I1 -h .N1 sva .D1 tan .D2 tra^ .I2 -aa .N2 viS .D3 va .D4 -asya^^^^ .I3 saadh^^^^ .D5 idh .D6 ta^ .D7 -i .I4 hetu- .I5 -h .I6 asti^^^^ N$";
C[1] = ".N0 =      .I1 -h/n+-1 .N1 refl. D1 = .I2 -aa/F/1/sg .N2 = .D4 N/sg/gen(v6) .I3 = .D5 weak form of root .D6 past participle .D7 noun .I4 = .I5 noun/masc/sg/nom(v1) .I6 be-del N$";
M[1] = ".N0 consciousness/masc/sg/nom(v1) .N1 self-will/fem/sg/nom(v1) .N2 all,world/N/sg/gen .I3 attainment,performance/masc/sg/nom(v1) .I4 cause/N/masc/sg/nom(v1) .I6 is N$";
P[1] = ".N0 consciousness, freedom .N2 the performance of the universe .I4 is the cause .N$";
F[1] = ".N0 consciousness, which is freedom, is the cause of the performance of everything .N$";
X[1] = ".N0 The world is a performance, and consciousness in freedom is its cause .N$";

*/

if (!isset($tL) || !$tL) {
  $tL = "NRIDCMPFX";
}
// $tierNodeRegex = "/\.([$tL]\d+)(?!\.[$tL]\d+)(.*)/";
$tierNodeRegex = "/\.([$tL][$\d]+)\s*((?:(?!\.[$tL][$\d]+).)*)/";

function HK2DN($str) { // convert Harvard-Kyoto encoded Sanskrit string to Devanagari
  $res = "";
  $chararr = mb_str_split($str);
  foreach ($chararr as $s) {
    switch ($s) {
      case 'a': $res .= "A"; break;
      default:  $res .= $s;  break;
    }
  }
  return $res;
}

function frag2HtmlTable($frag, $tiers) {
  // convert strings to a DAG of nodes (unlabelled as to arcs),
  // find the longest path across the DAG.
  $dag = frag2DAG($frag, $tiers); // create a DAG = { V, E, T, L }: vertexes, edges, topo sort array, maxLevels array
//  echo "AFTER frag2DAG: "; // fr="; var_dump($fr); echo"\n------\n"; 
//  echo "dag="; var_dump($dag);
  $dag = topoSort($dag);
//  echo "AFTER TOPOSORT: " . TS2str($dag["T"]) . "\n";
  $dag = levelize($dag);
//  echo "AFTER levelize: " . TS2str($dag["L"]) . "\n";
  $rs = "<TABLE class=\"tg\">\n";
  foreach ($tiers as $tier) {
    $rs .= tier2tr($frag, $tier, $dag);
  }  
  $rs .= "</TABLE>\n";
  return $rs;
}

/*
function tier2tr($fr, $tier, $i, $dag) {
  global $tierNodeRegex;
  
  $rs  = "<TR>";
  $colN = 0;
  preg_match_all($tierNodeRegex,
		 $fr[$tier][$i],
		 $matches,PREG_PATTERN_ORDER);
  $nMatches = count($matches[1]);

  // the first end of column is in the 2nd match.  So set up the first first:
  $prevboundaryl = $matches[1][0]; // boundary
  $prevcontent   = $matches[2][0]; // content
  $prevboundaryn = vertex_of($dag,$prevboundaryl);

  for ($i=1; $i<$nMatches; $i++) {
    $extry     = $matches[0][$i]; // extra
    $boundaryl = $matches[1][$i]; // boundary
    $content   = $matches[2][$i]; // content
    $preColN = $colN;
    $boundaryn = vertex_of($dag,$boundaryl);
    $postColN = $dag["L"][$boundaryn];
//  $rs .= "<TD COLSPAN=\"" . ($postColN - $preColN) . "\">col $preColN to $postColN from $prevboundaryl \"$prevcontent\" to $boundaryl </TD>\n";
    $rs .= "<TD align=center COLSPAN=\"" . ($postColN - $preColN) . "\">$prevcontent</TD>\n";
    $colN = $postColN;
    $prevcontent = $content;
    $prevboundaryl = $boundaryl;
    $prevboundaryn = $boundaryn;
  }
  $rs .= "</TR>\n";
  return $rs;
}
*/

function tier2tr($frag, $tier, $dag) {
  global $tierNodeRegex;
  
  $rs  = "<TR>";
  $colN = 0;
  preg_match_all($tierNodeRegex,
		 $frag[$tier],
		 $matches,PREG_PATTERN_ORDER);
  $nMatches = count($matches[1]);

  // the first end of column is in the 2nd match.  So set up the first first:
  $prevboundaryl = $matches[1][0]; // boundary
  $prevcontent   = $matches[2][0]; // content
  $prevboundaryn = vertex_of($dag,$prevboundaryl);

  for ($j=1; $j<$nMatches; $j++) {
    $extry     = $matches[0][$j]; // extra
    $boundaryl = $matches[1][$j]; // boundary
    $content   = $matches[2][$j]; // content
    $preColN = $colN;
    $boundaryn = vertex_of($dag,$boundaryl);
    $postColN = $dag["L"][$boundaryn];
//  $rs .= "<TD COLSPAN=\"" . ($postColN - $preColN) . "\">col $preColN to $postColN from $prevboundaryl \"$prevcontent\" to $boundaryl </TD>\n";
    $rs .= "<TD align=center COLSPAN=\"" . ($postColN - $preColN) . "\">$prevcontent</TD>\n";
    $colN = $postColN;
    $prevcontent = $content;
    $prevboundaryl = $boundaryl;
    $prevboundaryn = $boundaryn;
  }
  $rs .= "</TR>\n";
  return $rs;
}

function vertex_of($dag,$vertexlabel) {
  foreach ($dag["V"] as $key => $val) {
    if ($val==$vertexlabel) {
      return $key;
} } }

function levelize($dag) {
  $dag["L"] = array_fill(0, $dag["v"], 0); // v zeroes.
  for ($tn=0; $tn < $dag["v"]; $tn++) {  // for each node in toposort order
    // for all edges from TS[$tn]
    for ($en=0; $en < $dag["e"]; $en++) { // (go through all the edges)
      if ($dag["E"][$en]["from"] == $dag["T"][$tn]) { // (if $tn is the from end of that edge)
        // step forward 1, and declare that to be a new maxlevel if actually larger than currently
        $dag["L"][$dag["E"][$en]["to"]] = max($dag["L"][$dag["E"][$en]["to"]],
					      $dag["L"][$dag["E"][$en]["from"]]+1); 
  } } }
  return $dag; // now L[0..$n] has the max levels for all the nodes
}

function topoSort(&$dag) {
  $visited   = array_fill(0, $dag["v"], false); // v falses, counting from nn=0 to nn=v-1.  Visit every node once.
  $dag["T"]  = array_fill(0, $dag["v"], 0    ); // v zeroes. 
  // the i'th node in the toposort order is T[i-1]. regarding V[8], 
  // once toposorted, if the 8th node is first in TS order, then dag["T"][0]==8.

  $insertIdx = $dag["v"]-1; // insert into .T from the end.
//  echo "topoSort debug output: insertIdx = $insertIdx, ";
  for ($nn = 0; $nn < $dag["v"]; $nn++) {  // for all nodes
    if (!$visited[$nn]) {
//      echo $insertIdx ."|";
      $insertIdx = dfs($insertIdx, $nn, $visited, $dag["T"], $dag); // depth first traversal from this node forward to the end with dfs recursion carrying depth
//      echo " fills to " . $insertIdx . "\n";
  } }
  return $dag;
}

function TS2str($TS) {  $rs = "";  foreach ($TS as $n) { $rs .= " $n"; } return $rs; }

function dfs($tInsPt, $nFrom, &$visited, &$TS, $dag) { // recursively dive into successors until no successor, inserting from the end on the way back up
  // each call of dfs will insert an unvisited edge successor into the last available slot in the TopoSort array

//  echo "dfs($tInsPt,$nFrom=" . $dag["V"][$nFrom] . ", TS=" . TS2str($TS) . ")\n";
  // declare this node to be visited.
  $visited[$nFrom] = true; // make it visited

  // for every successor ...
  for ($en = 0; $en < $dag["e"]; $en++) {
    if ($dag["E"][$en]["from"] == $nFrom) {
      $nTo = $dag["E"][$en]["to"]; // an actual successor of node nFrom on some edge, the en'th
      
      // if it has not been visited then recurse into it
      if ($visited[$nTo] == false) {
        // recursive call (was dfs(tInsPt,nFrom,..))
        $tInsPt = dfs($tInsPt, $nTo, $visited, $TS, $dag);

      }
    }
  }

  // if there is a successor to node nFrom or not, nFrom itself was unvisited
  // before we got here so now it needs to be inserted into
  // the TopoSort array at the last available position, thus making good
  // on the visit to this node.

  // after already expanding inside the tree recursively, and winding
  // up inside there for all the successors of node nFrom, wind it up at
  // this level by putting this node nFrom onto the topo sort list.
//  echo " $tInsPt<<#$nFrom=" . $dag["V"][$nFrom] . "\n";
  $TS[$tInsPt] = $nFrom; // put this one's index into the end of Topo sort array
  $tInsPt = ($tInsPt-1 < 0 ? 0: $tInsPt-1); // Can't go before the beginning
  return $tInsPt;
}

function frag2DAG($frag, $tiers) {
/*
  echo "frag2DAG: frag=";
  var_dump($frag);
  echo "frag2DAG: tiers=";
  var_dump($tiers);
*/
  // DAG a directed acyclic graph is a set of ID-labelled nodes and (content labelled) edges without loops
  // with a single start and a single end node.
  $dag["v"] = 0;  // number of vertexes
  $dag["e"] = 0;  // number of edges
  $dag["V"] = []; // keyed array of vertexes index number => label. Labels are like N0, N$, X3, etc.
  $dag["E"] = []; // array of edges, integer index pairs u,v into $dag["V"], representing an edge from u to v.
  $dag["T"] = []; // topological sort array: $dag["T"][ord] = nn asserts of vertex nn that its topo sort ordinal is ord
  $dag["L"] = []; // max level for each vertex
  $dag["start"] = "N0";
  $dag["end"] = "N$";
  foreach ($tiers as $tierid => $tierlabel) {
//    echo "foreach tiers as $tierid to $tierlabel\n";
//    echo "  frag[$tierlabel] is ";
//    var_dump($frag[$tierlabel]);
    if (isset($frag[$tierlabel])) {
//      echo "adding tier $tierlabel to dag\n";
      $dag = tierIntoDag($frag[$tierlabel], $dag);
    }
  }  
  return $dag;
}

function tierIntoDag($ts,$dag) {
  // ts is a string containing .<LETTER># substrings, each a .boundary
  // go through it extracting all the .boundaries, adding them to $dag["V"]
  // redundant links are okay because we are only using this to calculate levels/depths for each vertex.
  
  global $tierNodeRegex;

  $first = true;
  $prevboundaryl="";
  $prevboundaryn=-1;
  $matches = [];
  preg_match_all($tierNodeRegex, $ts, $matches, PREG_PATTERN_ORDER);

//  echo "tierIntoDag: " . $ts . "\n";
/*
  echo "dumping matches after matching " . $ts . ": ";
  var_dump($matches);
  echo "\nXXXX\nmatches.len=" . count($matches)
      . ", matches[0].len=" . count($matches[0]) . ", matches[1].len=" . count($matches[1])
      . ", matches[2].len=" . count($matches[2]) . "."
      . "\n";
*/

  $nMatches = count($matches);
  // for ($i = 0; $i < $nMatches; $i++) { $key = $matches[1][$i] ... }
  foreach ($matches[1] as $key => $boundaryl) {
    $content = $matches[2][$key];
//    echo "key=$key, boundaryl=$boundaryl, content=$content. ";
    $found = false;
    foreach ($dag["V"] as $Vn => $Vl) {
      if ($Vl==$boundaryl) {
         $boundaryn = $Vn;
      	 $found = true;
      	 break;
      }
    }
    if (!$found) {
      // add it to the vertex array under the numerical index dag.v
      $boundaryn = $dag["v"];
      $dag["V"][$boundaryn] = $boundaryl;  // this creates a vertex with key=$boundaryn and value=$boundaryl
      $dag["v"]++;
    }
    
    if ($first) {
      $first = false;
    } else {
      $dag["E"][$dag["e"]++] = edge($prevboundaryn,$boundaryn,$prevcontent);
    }
    $prevboundaryn = $boundaryn;
    $prevcontent = $content;
  }
  return $dag;      
}

function edge(/* vertex # */$u, /* vertex # */$v, /* edge label */$s) {
  $a = array("from" => $u, "to" => $v, "label" => $s);
  // echo "edge: from " . $a["from"] . ", to " . $a["to"] . ", \"" . $a["label"] . "\"\n";
  return $a;
}

