include myLibs/myDebug.e
include myLibs/myConv.e
include myLibs/mySeq.e as seq

global constant
  NO_TAG = 0, START_TAG = 1, SINGLE_TAG = 2, END_TAG = 3, INVALID_TAG = 4, EOF_TAG = 5,
  TEXT = 6, COMMENT = 7, CDATA = 8

constant htmlNames = {
  {"&Aacute;", 193, ""},   {"&aacute;", 225, ""},   {"&Acirc;", 194, ""},
  {"&acirc;", 226, ""},    {"&acute;", 180, ""},    {"&AElig;", 198, ""},
  {"&aelig;", 230, ""},    {"&Agrave;", 192, ""},   {"&agrave;", 224, ""},
  {"&Alpha;", 913, "?"},    {"&alpha;", 945, "a"},    {"&amp;", 38, "&"},
  {"&Aring;", 197, ""},    {"&aring;", 229, ""},    {"&Atilde;", 195, ""},
  {"&atilde;", 227, ""},   {"&Auml;", 196, ""},     {"&auml;", 228, ""},
  {"&Beta;", 914, "?"},     {"&beta;", 946, ""},     {"&brkbar;", 166, ""},
  {"&brvbar;", 166, ""},   {"&bull;", 149, ""},     {"&Ccedil;", 199, ""},
  {"&ccedil;", 231, ""},   {"&cedil;", 184, ""},    {"&cent;", 162, ""},
  {"&Chi;", 935, "?"},      {"&chi;", 967, "?"},      {"&clubs;", 9827, "?"},
  {"&copy;", 169, ""},     {"&curren;", 164, ""},   {"&dagger;", 134, ""},
  {"&Dagger;", 135, ""},   {"&darr;", 8595, "?"},   {"&deg;", 176, ""},
  {"&Delta;", 916, "?"},    {"&delta;", 948, "d"},    {"&diams;", 9830, "?"},
  {"&die;", 168, ""},      {"&divide;", 247, ""},   {"&Dstrok;", 208, ""},
  {"&Eacute;", 201, ""},   {"&eacute;", 233, ""},   {"&Ecirc;", 202, ""},
  {"&ecirc;", 234, ""},    {"&Egrave;", 200, ""},   {"&egrave;", 232, ""},
  {"&Epsilon;", 917, "?"},  {"&epsilon;", 949, "e"},  {"&Eta;", 919, "?"},
  {"&eta;", 951, "?"},      {"&ETH;", 208, ""},      {"&eth;", 240, ""},
  {"&Euml;", 203, ""},     {"&euml;", 235, ""},     {"&euro;", 8364, ""},
  {"&frac12;", 189, ""},   {"&frac14;", 188, ""},   {"&frac34;", 190, ""},
  {"&frasl;", 8260, "/"},   {"&Gamma;", 915, "G"},    {"&gamma;", 947, "?"},
  {"&gt;", 62, ">"},        {"&harr;", 8596, "?"},   {"&hearts;", 9829, "?"},
  {"&hibar;", 175, ""},    {"&Iacute;", 205, ""},   {"&iacute;", 237, ""},
  {"&Icirc;", 206, ""},    {"&icirc;", 238, ""},    {"&iexcl;", 161, ""},
  {"&Igrave;", 204, ""},   {"&igrave;", 236, ""},   {"&Iota;", 921, "?"},
  {"&iota;", 953, "?"},     {"&iquest;", 191, ""},   {"&Iuml;", 207, ""},
  {"&iuml;", 239, ""},     {"&Kappa;", 922, "?"},    {"&kappa;", 954, "?"},
  {"&Lambda;", 923, "?"},   {"&lambda;", 955, "?"},   {"&laquo;", 171, ""},
  {"&larr;", 8592, "?"},   {"&loz;", 9674, "?"},     {"&lsaquo;", 8249, ""},
  {"&lt;", 60, "<"},        {"&macr;", 175, ""},     {"&mdash;", 151, "--"},
  {"&micro;", 181, ""},    {"&middot;", 183, ""},   {"&Mu;", 924, "?"},
  {"&mu;", 956, ""},       {"&nbsp;", 32, " "},      {"&ndash;", 150, "-"},
  {"&not;", 172, ""},      {"&Ntilde;", 209, ""},   {"&ntilde;", 241, ""},
  {"&Nu;", 925, "?"},       {"&nu;", 957, "?"},       {"&Oacute;", 211, ""},
  {"&oacute;", 243, ""},   {"&Ocirc;", 212, ""},    {"&ocirc;", 244, ""},
  {"&oelig;", 156, ""},    {"&Ograve;", 210, ""},   {"&ograve;", 242, ""},
  {"&Omega;", 937, "O"},    {"&omega;", 969, "?"},    {"&Omicron;", 927, "?"},
  {"&omicron;", 959, "?"},  {"&ordf;", 170, ""},     {"&ordm;", 186, ""},
  {"&Oslash;", 216, ""},   {"&oslash;", 248, ""},   {"&Otilde;", 213, ""},
  {"&otilde;", 245, ""},   {"&Ouml;", 214, ""},     {"&ouml;", 246, ""},
  {"&para;", 182, ""},     {"&Phi;", 934, "F"},      {"&phi;", 966, "f"},
  {"&Pi;", 928, "?"},       {"&pi;", 960, "p"},       {"&plusmn;", 177, ""},
  {"&pound;", 163, ""},    {"&Psi;", 936, "?"},      {"&psi;", 968, "?"},
  {"&quot;", 34, "\""},     {"&raquo;", 187, ""},    {"&rarr;", 8594, "?"},
  {"&reg;", 174, ""},      {"&Rho;", 929, "?"},      {"&rho;", 961, "?"},
  {"&rsaquo;", 8250, ""},  {"&sect;", 167, ""},     {"&shy;", 173, ""},
  {"&Sigma;", 931, "S"},    {"&sigma;", 963, "s"},    {"&sigmaf;", 962, "?"},
  {"&spades;", 9824, "?"}, {"&sup1;", 185, ""},     {"&sup2;", 178, ""},
  {"&sup3;", 179, ""},     {"&szlig;", 223, ""},    {"&Tau;", 932, "?"},
  {"&tau;", 964, "t"},      {"&Theta;", 920, "T"},    {"&theta;", 952, "?"},
  {"&THORN;", 222, ""},    {"&thorn;", 254, ""},    {"&times;", 215, ""},
  {"&Uacute;", 218, ""},   {"&uacute;", 250, ""},   {"&uarr;", 8593, "?"},
  {"&Ucirc;", 219, ""},    {"&ucirc;", 251, ""},    {"&Ugrave;", 217, ""},
  {"&ugrave;", 249, ""},   {"&uml;", 168, ""},      {"&Upsilon;", 933, "?"},
  {"&upsilon;", 965, "?"},  {"&Uuml;", 220, ""},     {"&uuml;", 252, ""},
  {"&Xi;", 926, "?"},       {"&xi;", 958, "?"},       {"&Yacute;", 221, ""},
  {"&yacute;", 253, ""},   {"&yen", 165, ""},       {"&Yuml;", 159, ""},
  {"&yuml;", 255, ""},     {"&Zeta;", 918, "?"},     {"&zeta;", 950, "?"}
}

sequence tags
global sequence buffer
global integer pos

tags = {}
buffer = {}
pos = 0

------------------------------------------------------------------------------

global function findRecord(sequence sht, integer fld, object seqVal)
-- lists records from sheet sht with value seqVal in field fld
  if fld = 0 then return {} end if
  for i = 1 to length(sht) do
    if equal(sht[i][fld], seqVal) then
      return i
    end if
  end for
  return 0
end function

------------------------------------------------------------------------------

global function convertHtml(sequence buf)
  sequence s, res, htmlChar
  integer st, en, p
  
  s = buf
  res = ""
  htmlChar = ""
  st = find('&',s)
--  puts(f_debug, "\n"&buf&"\n")
  while st>0 do
    en = find(';',s[st+1..$])
    if en>0 then
      en += st
      htmlChar = s[st..en]
--  puts(f_debug, "htmlChar="&htmlChar&"\n")
      if s[st+1] = '#' then
        res &= s[1..st-1]&{val(htmlChar[3..$-1])}
      else
        p = findRecord(htmlNames,1,htmlChar)
        if p>0 then
          res &= s[1..st-1]&htmlNames[p][3]
        else
          res &= s[1..en]
        end if
      end if
      s = delete(s, 1, en)
    else
      res &= s[1..st]
      s = delete(s, 1, st)
    end if
    st = find('&',s)
  end while
  res &= s
--  puts(f_debug, res&"\n")
  return res
end function

------------------------------------------------------------------------------

procedure push(sequence s)
  tags = prepend(tags, s)
end procedure

------------------------------------------------------------------------------

function pop()
  sequence s

  if length(tags) > 1 then
    s = tags[1]
    tags = tags[2..$]
  elsif length(tags) = 1 then
    s = tags[1]
    tags = {}
  else
    s = {}
  end if
  return s
end function

------------------------------------------------------------------------------

function at(integer i)
  if length(tags) < i then
    return {}
  else
    return tags[i]
  end if
end function

------------------------------------------------------------------------------

global function tagPath()
  sequence res

  res = ""
  if length(tags)=0 then
    res = {}
  else
    for i = length(tags) to 1 by -1 do res &= "/"& at(i) end for
  end if
  return res
end function

------------------------------------------------------------------------------

global procedure showBuffer(integer i)
  integer lg

  lg = length(buffer)
  logMsg(sprintf("pos=%d", i))
  logMsg("0        1         2         3         4         5         6")
  logMsg("123456789012345678901234567890123456789012345678901234567890")
  if i+59 > lg then
    logMsg(showPrintable(buffer[i..lg]))
  else
    logMsg(showPrintable(buffer[i..i+59]))
  end if
end procedure

------------------------------------------------------------------------------

global function removeEol(sequence s)
  sequence res

  res = ""
  for i = 1 to length(s) do
    if s[i] = '\n' then res &= ' ' else res &= s[i] end if
  end for
  return res
end function

------------------------------------------------------------------------------

function getDelimitedText(sequence delimiter) 
  integer l, st, en

  l = length(delimiter)-1
  st = pos
  en = 0
  while (pos < length(buffer)-l) do
    if equal(buffer[pos..pos+l], delimiter) then
      en = pos + l
      exit
    end if
    pos += 1
  end while
  if en > 0 then
    pos = en+1
    return buffer[st..en]
  else
    warnError(sprintf("End delimiter (%s) not found!", {delimiter}), 1) 
  end if
end function

------------------------------------------------------------------------------

global function scanTag(integer f_out)
  integer st, en
  sequence section, text, s, tag
  integer lg
   
  st=0
  en = 0
  text = ""
  s = ""
  tag = ""
  lg = length(buffer)
  section = {}
  while pos < lg do
    pos += 1

    -- comment zones
    if (pos < lg-3) and equal(buffer[pos..pos+3], "<!--") then   -- begin of Comment zone
      text = getDelimitedText("-->")
      puts(f_out, tagPath()&"\tCOMMENT: '"&text&"'\n")
--      section = append(section, {"COMMENT", text} )
      text = ""
    elsif (pos < lg-4) and equal(buffer[pos..pos+4], "<?xml") then   -- begin of Comment zone
      text = getDelimitedText("?>")
      puts(f_out, tagPath()&"\tXML: '"&text&"'\n")
--      section = append(section, {"XML", text} )
      text = ""
    end if

    if buffer[pos] = '<' then
      text = seq:trim(text)
      if length(text) then
        puts(f_out, tagPath()&"\tTEXT: '"&convertHtml(text)&"'\n")
        section = append(section, convertHtml(text))
        text = ""
      end if
      pos += 1
      st = pos
      while pos < lg do
        pos += 1
        if buffer[pos] = '>' then
          en = pos - 1
          exit
        end if
      end while
      if buffer[st] = '/' then
        tag = lower(buffer[st+1..en])
        if (length(tags)>0) and equal(at(1), tag) then
          void = pop()
          puts(f_out, tagPath()&"/"&tag&"\n")
          exit
        else
          warnError(sprintf("Tag de fin (%s) diffrent du tag de dbut (%s)!\n", {tag, at(1)}), 1)
        end if
      elsif buffer[en] = '/' then
        s = splitString(buffer[st..en-1], ' ')
        tag = lower(s[1])
        if length(s) > 1 then
          puts(f_out, tagPath()&"/"&tag&"\tPARAMS: "&tokenize(s[2..$-1],' ')&"\n")
          section = append(section, {tag})
          for i = 2 to length(s) do
            if length(leftTrim(s[i])) then
              section[$] = append(section[$], splitString(s[i],'='))
            end if
          end for
        else
          puts(f_out, tagPath()&"/"&tag&"\n")
          section = append(section, {tag})
        end if
      else
        s = splitString(buffer[st..en], ' ')
        tag = lower(s[1])
        if length(s) > 1 then
          puts(f_out, tagPath()&"/"&tag&"\tPARAMS: "&tokenize(s[2..$],' ')&"\n")
          section = append(section, {tag})
          for i = 2 to length(s) do
            if length(leftTrim(s[i])) then
              section[$] = append(section[$], splitString(s[i],'='))
            end if
          end for
        else
          puts(f_out, tagPath()&"/"&tag&"\n")
          section = append(section, {tag})
        end if
        push(tag)
        section[$] = append(section[$], scanTag(f_out) )
      end if
    else
      if buffer[pos] != '\n' then text &= buffer[pos] end if
    end if
  end while
  return section
end function

------------------------------------------------------------------------------

global function searchElement(sequence x, sequence variable, sequence elt)
  sequence res

  res = {}
  if equal(x, elt) then
--    puts(f_debug, "Found in "&variable&"\n")
    return variable
  else
    for i=1 to length(x) do
      if sequence(x[i]) then
        res = searchElement(x[i], append(variable, i), elt)
        if length(res) then return res end if
      end if
    end for
  end if
  return {}
end function

------------------------------------------------------------------------------

global sequence searchResults
searchResults = {}

global procedure searchAllElements(sequence x, sequence variable, sequence elt)
  sequence res

  res = {}
  if equal(x, elt) then
--    puts(f_debug, "Found in "&variable&"\n")
    searchResults = append(searchResults, variable)
  else
    for i=1 to length(x) do
      if sequence(x[i]) then
        searchAllElements(x[i], append(variable, i), elt)
      end if
    end for
  end if
end procedure

------------------------------------------------------------------------------

-- this is fetch function from OpenEuphoria 4
-- if you use OpenEuphoria comment this one
global function fetch(sequence structure, sequence path)
  for i=1 to length(path)-1 do
    structure = structure[path[i]]
  end for
  return structure[path[$]]
end function

------------------------------------------------------------------------------
