/*
    Copyright (C) 2003-2006 Teus Benschop.

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

*/


#include "libraries.h"
#include "utilities.h"
#include "constants.h"


// Storage for commandline arguments.
bool check_in_stylesheet = false;
set<ustring> styles_in_sheet;


// Variables for our use.
ustring book = "Unknown";
ustring chapter = "0";
ustring verse = "0";
ustring text;


void check_on_endmarker (ustring& line, const ustring& marker, 
     const ustring& book, const ustring& chapter, const ustring& verse)
// This test is ran by any marker that needs an endmarker.
// It checks on that, and if found, removes it from the line,
// and if not found, gives a message.
{
  // Look for the endmarker.
  ustring endmarker;
  endmarker = "\\" + marker + "*";
  size_t endmarkerposition;
  endmarkerposition = line.find (endmarker);
  if (endmarkerposition != string::npos) {
    // Found: remove it from the line.
    line.erase (endmarkerposition, endmarker.length());
  } else {
    // Not found: Error message.
    ustring message;
    message = "Endmarker " + endmarker + " not found";
    output_xml_message (book, chapter, verse, message);
  }
}


void deprecated_marker (const ustring& marker, const ustring& book, const ustring& chapter, const ustring& verse)
{
  ustring message = "Deprecated marker " + marker;
  output_xml_message (book, chapter, verse, message);
}


ustring usfm_extract_marker_with_forwardslash (ustring & line)
/*
Returns the usfm marker from the line, but only if it starts with a forward slash
*/
{
  ustring returnvalue;
  line = trim (line);
  size_t offposition;
  offposition = line.find ("/");
  if (offposition != string::npos) {
    line.erase (0, offposition);
    size_t endposition;
    endposition = line.find_first_of (" *", 1);
    if (endposition != string::npos) {
      returnvalue = line.substr (0, ++endposition);
      line.erase (0, endposition);
    } else {
      returnvalue = line;
      line.clear();
    }
  }
  if (returnvalue.length () > 0)
    returnvalue.erase (0, 1);   // Remove slash.
  return trim (returnvalue);
}


void start_element_handler (GMarkupParseContext *context,
                            const gchar         *element_name,
                            const gchar        **attribute_names,
                            const gchar        **attribute_values,
                            gpointer             user_data,
                            GError             **error)
{
  string element = element_name;
  if (element == BOOK_TAG) {
    book = attribute_values[0];
  } 
  else if (element == CHAPTER_TAG) {
    chapter = attribute_values[0];
  } 
  else if (element == VERSE_TAG ) {
    verse = attribute_values[0];
  }
}


void end_element_handler (GMarkupParseContext *context,
                          const gchar         *element_name,
                          gpointer             user_data,
                          GError             **error)
{
  string element = element_name;

  if (element == BOOK_TAG) {
    // We've reached the end of the book. 
    // Check on the number of chapters in this book.

  } else if (element == CHAPTER_TAG) {
    // We've reached the end of a chapter.
    // Check on the verses it contains.

  } else if (element == VERSE_TAG ) {
    // We are at the end of a verse. No special checking here.
  }
}


void text_handler (GMarkupParseContext *context,
                   const gchar         *text,
                   gsize                text_len,
                   gpointer             user_data,
                   GError             **error)
{
  // Extract the marker, and deal with it.
  ustring utext (text);
  utext = trim (utext);
  if (utext.empty())
    return;
  ustring line = utext;
  ustring marker = usfm_extract_marker_within_line (line);
  if (marker.empty()) {
    // No marker found.
    output_xml_message (book, chapter, verse, "Line without USFM");
  }
  while (!marker.empty()) {
    
    if (marker == "id") {
    // Is the line long enough?
    if (line.length() < 3) {
      ustring message;
      message = "ID line is too short: " + line;
      output_xml_message (book, chapter, verse, message);
    } else {
      ustring id = line.substr (0, 3);
      if (book == "Unknown")
      output_xml_message (book, chapter, verse, "Unknown book with id " + id);
      if (id != upperCase (id))
      output_xml_message (book, chapter, verse, "Non-uppercase id code");
    }
    } else if (marker == "c") {
    } else if (marker == "v") {
    } else if (marker == "add") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "bdit") {
      check_on_endmarker (line, marker, book, chapter, verse);
      deprecated_marker (marker, book, chapter, verse);    
    } else if (marker == "bd") {
      check_on_endmarker (line, marker, book, chapter, verse);
      deprecated_marker (marker, book, chapter, verse);    
    } else if (marker == "bk") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "b") {
    } else if (marker == "ca") {
    } else if (marker == "cd") {
    } else if (marker == "cls") {
    } else if (marker == "cl") {
    } else if (marker == "conc") {
    } else if (marker == "cov") {
    } else if (marker == "cp") {
    } else if (marker == "dc") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "d") {
    } else if (marker == "em") {
      check_on_endmarker (line, marker, book, chapter, verse);
      deprecated_marker (marker, book, chapter, verse);    
    } else if (marker == "fdc") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "fe") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "fig") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "fk") {
    } else if (marker == "fl") {
    } else if (marker == "fm") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "fp") {
    } else if (marker == "fqa") {
    } else if (marker == "fq") {
    } else if (marker == "fr") {
    } else if (marker == "ft") {
    } else if (marker == "fv") {
    } else if (marker == "f") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "glo") {
    } else if (marker == "h1") {
    } else if (marker == "h2") {
    } else if (marker == "h3") {
    } else if (marker == "h") {
    } else if (marker == "ib") {
    } else if (marker == "ide") {
    } else if (marker == "idx") {
    } else if (marker == "ie") {
    } else if (marker == "iex") {
    } else if (marker == "imi") {
    } else if (marker == "imq") {
    } else if (marker == "imt1") {
    } else if (marker == "imt2") {
    } else if (marker == "imt3") {
    } else if (marker == "imt4") {
    } else if (marker == "imte") {
    } else if (marker == "imt") {
    } else if (marker == "im") {
    } else if (marker == "intro") {
    } else if (marker == "io1") {
    } else if (marker == "io2") {
    } else if (marker == "io3") {
    } else if (marker == "io4") {
    } else if (marker == "ior") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "iot") {
    } else if (marker == "io") {
    } else if (marker == "ipi") {
    } else if (marker == "ipq") {
    } else if (marker == "ipr") {
    } else if (marker == "ip") {
    } else if (marker == "iq1") {
    } else if (marker == "iq2") {
    } else if (marker == "iq3") {
    } else if (marker == "iq") {
    } else if (marker == "is1") {
    } else if (marker == "is2") {
    } else if (marker == "is") {
    } else if (marker == "it") {
      check_on_endmarker (line, marker, book, chapter, verse);
      deprecated_marker (marker, book, chapter, verse);    
    } else if (marker == "k1") {
    } else if (marker == "k2") {
    } else if (marker == "k") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "li1") {
    } else if (marker == "li2") {
    } else if (marker == "li3") {
    } else if (marker == "li4") {
    } else if (marker == "lit") {
    } else if (marker == "li") {
    } else if (marker == "maps") {
    } else if (marker == "mi") {
    } else if (marker == "mr") {
    } else if (marker == "ms1") {
    } else if (marker == "ms2") {
    } else if (marker == "ms") {
    } else if (marker == "mt1") {
    } else if (marker == "mt2") {
    } else if (marker == "mt3") {
    } else if (marker == "mt4") {
    } else if (marker == "mte1") {
    } else if (marker == "mte2") {
    } else if (marker == "mte") {
    } else if (marker == "mt") {
    } else if (marker == "m") {
    } else if (marker == "nb") {
    } else if (marker == "nd") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "ndx") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "no") {
      check_on_endmarker (line, marker, book, chapter, verse);
      deprecated_marker (marker, book, chapter, verse);    
    } else if (marker == "ord") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "p1") {
    } else if (marker == "p2") {
    } else if (marker == "pb") {
    } else if (marker == "pc") {
    } else if (marker == "pi1") {
    } else if (marker == "pi2") {
    } else if (marker == "pi3") {
    } else if (marker == "pi") {
    } else if (marker == "pmc") {
    } else if (marker == "pmo") {
    } else if (marker == "pmr") {
    } else if (marker == "pm") {
    } else if (marker == "pn") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "pref") {
    } else if (marker == "pro") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "pubinfo") {
    } else if (marker == "pub") {
    } else if (marker == "p") {
    } else if (marker == "q1") {
    } else if (marker == "q2") {
    } else if (marker == "q3") {
    } else if (marker == "qac") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "qa") {
    } else if (marker == "qc") {
    } else if (marker == "qm1") {
    } else if (marker == "qm2") {
    } else if (marker == "qm3") {
    } else if (marker == "qm") {
    } else if (marker == "qr") {
    } else if (marker == "qs") {
    } else if (marker == "qt") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "q") {
    } else if (marker == "rem") {
    } else if (marker == "restore") {
    } else if (marker == "r") {
    } else if (marker == "s1") {
    } else if (marker == "s2") {
    } else if (marker == "s3") {
    } else if (marker == "s4") {
    } else if (marker == "sc") {
      check_on_endmarker (line, marker, book, chapter, verse);
      deprecated_marker (marker, book, chapter, verse);    
    } else if (marker == "sig") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "sls") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "spine") {
    } else if (marker == "sp") {
    } else if (marker == "sr") {
    } else if (marker == "s") {
    } else if (marker == "tc1") {
    } else if (marker == "tc2") {
    } else if (marker == "tc3") {
    } else if (marker == "tc4") {
    } else if (marker == "tcr1") {
    } else if (marker == "tcr2") {
    } else if (marker == "tcr3") {
    } else if (marker == "tcr4") {
    } else if (marker == "th1") {
    } else if (marker == "th2") {
    } else if (marker == "th3") {
    } else if (marker == "th4") {
    } else if (marker == "thr1") {
    } else if (marker == "thr2") {
    } else if (marker == "thr3") {
    } else if (marker == "thr4") {
    } else if (marker == "tl") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "toc1") {
    } else if (marker == "toc2") {
    } else if (marker == "toc") {
    } else if (marker == "tr") {
    } else if (marker == "va") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "vp") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "wg") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "wh") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "wj") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "w") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "xdc") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker == "xk") {
    } else if (marker == "xo") {
    } else if (marker == "xq") {
    } else if (marker == "xt") {
    } else if (marker == "x") {
      check_on_endmarker (line, marker, book, chapter, verse);
    } else if (marker.find ("*") != string::npos) {
      output_xml_message (book, chapter, verse, "Unmatched end marker " + marker);
    } else {
      output_xml_message (book, chapter, verse, "Unknown USFM " + marker);
    }

    // Optionally check whether this marker is in the stylesheet.
    if (check_in_stylesheet) {
      if (styles_in_sheet.find (marker) == styles_in_sheet.end()) {
        output_xml_message (book, chapter, verse, "Marker " + marker + " not in stylesheet");
      }
    }
    
    // Extract any next marker in this line.    
    marker = usfm_extract_marker_within_line (line);
  }
  
  // Deal with accidentally entered forward slashes instead of backslashes.
  line = utext;
  marker = usfm_extract_marker_with_forwardslash (line);
  while (!marker.empty()) {
    ustring originalmarker (marker);
    size_t pos = marker.find("*");
    if (pos != string::npos) {
    marker.erase (pos, 1);
    }
    if (
    (marker == "id")
    || (marker == "c")
    || (marker == "v")
    || (marker == "add")
    || (marker == "bdit")
    || (marker == "bd")
    || (marker == "bk")
    || (marker == "b")
    || (marker == "ca")
    || (marker == "cd")
    || (marker == "cls")
    || (marker == "cl")
    || (marker == "conc")
    || (marker == "cov")
    || (marker == "cp")
    || (marker == "dc")
    || (marker == "d")
    || (marker == "em")
    || (marker == "fdc")
    || (marker == "fe")
    || (marker == "fig")
    || (marker == "fk")
    || (marker == "fl")
    || (marker == "fm")
    || (marker == "fp")
    || (marker == "fqa")
    || (marker == "fq")
    || (marker == "fr")
    || (marker == "ft")
    || (marker == "fv")
    || (marker == "f")
    || (marker == "glo")
    || (marker == "h1")
    || (marker == "h2")
    || (marker == "h3")
    || (marker == "h")
    || (marker == "ib")
    || (marker == "ide")
    || (marker == "idx")
    || (marker == "ie")
    || (marker == "iex")
    || (marker == "imi")
    || (marker == "imq")
    || (marker == "imt1")
    || (marker == "imt2")
    || (marker == "imt3")
    || (marker == "imt4")
    || (marker == "imte")
    || (marker == "imt")
    || (marker == "im")
    || (marker == "intro")
    || (marker == "io1")
    || (marker == "io2")
    || (marker == "io3")
    || (marker == "io4")
    || (marker == "ior")
    || (marker == "iot")
    || (marker == "io")
    || (marker == "ipi")
    || (marker == "ipq")
    || (marker == "ipr")
    || (marker == "ip")
    || (marker == "iq1")
    || (marker == "iq2")
    || (marker == "iq3")
    || (marker == "iq")
    || (marker == "is1")
    || (marker == "is2")
    || (marker == "is")
    || (marker == "it")
    || (marker == "k1")
    || (marker == "k2")
    || (marker == "k")
    || (marker == "li1")
    || (marker == "li2")
    || (marker == "li3")
    || (marker == "li4")
    || (marker == "lit")
    || (marker == "li")
    || (marker == "maps")
    || (marker == "mi")
    || (marker == "mr")
    || (marker == "ms1")
    || (marker == "ms2")
    || (marker == "ms")
    || (marker == "mt1")
    || (marker == "mt2")
    || (marker == "mt3")
    || (marker == "mt4")
    || (marker == "mte1")
    || (marker == "mte2")
    || (marker == "mte")
    || (marker == "mt")
    || (marker == "m")
    || (marker == "nb")
    || (marker == "nd")
    || (marker == "ndx")
    || (marker == "no")
    || (marker == "ord")
    || (marker == "p1")
    || (marker == "p2")
    || (marker == "pb")
    || (marker == "pc")
    || (marker == "pi1")
    || (marker == "pi2")
    || (marker == "pi3")
    || (marker == "pi")
    || (marker == "pmc")
    || (marker == "pmo")
    || (marker == "pmr")
    || (marker == "pm")
    || (marker == "pn")
    || (marker == "pref")
    || (marker == "pro")
    || (marker == "pubinfo")
    || (marker == "pub")
    || (marker == "p")
    || (marker == "q1")
    || (marker == "q2")
    || (marker == "q3")
    || (marker == "qac")
    || (marker == "qa")
    || (marker == "qc")
    || (marker == "qm1")
    || (marker == "qm2")
    || (marker == "qm3")
    || (marker == "qm")
    || (marker == "qr")
    || (marker == "qs")
    || (marker == "qt")
    || (marker == "q")
    || (marker == "rem")
    || (marker == "restore")
    || (marker == "r")
    || (marker == "s1")
    || (marker == "s2")
    || (marker == "s3")
    || (marker == "s4")
    || (marker == "sc")
    || (marker == "sig")
    || (marker == "sls")
    || (marker == "spine")
    || (marker == "sp")
    || (marker == "sr")
    || (marker == "s")
    || (marker == "tc1")
    || (marker == "tc2")
    || (marker == "tc3")
    || (marker == "tc4")
    || (marker == "tcr1")
    || (marker == "tcr2")
    || (marker == "tcr3")
    || (marker == "tcr4")
    || (marker == "th1")
    || (marker == "th2")
    || (marker == "th3")
    || (marker == "th4")
    || (marker == "thr1")
    || (marker == "thr2")
    || (marker == "thr3")
    || (marker == "thr4")
    || (marker == "tl")
    || (marker == "toc1")
    || (marker == "toc2")
    || (marker == "toc")
    || (marker == "tr")
    || (marker == "va")
    || (marker == "vp")
    || (marker == "wg")
    || (marker == "wh")
    || (marker == "wj")
    || (marker == "w")
    || (marker == "xdc")
    || (marker == "xk")
    || (marker == "xo")
    || (marker == "xq")
    || (marker == "xt")
    || (marker == "x")
    ) {
    output_xml_message (book, chapter, verse, "Normal slash for /" + originalmarker);
    }
    // Extract any next marker in this line.    
    marker = usfm_extract_marker_with_forwardslash (line);
  }
  
  // Check for widow backslashes.
  line = utext + " ";
  if ((line.find ("\\ ") != string::npos) || (line == "\\")) {
    output_xml_message (book, chapter, verse, "Widow backslash");
  }
  
}



void passthrough_handler    (GMarkupParseContext *context,
                             const gchar         *passthrough_text,
                             gsize                text_len,
                             gpointer             user_data,
                             GError             **error)
{
}


void error_handler          (GMarkupParseContext *context,
                             GError              *error,
                             gpointer             user_data)
{
  cerr << error->message << endl;
}


int main (int argc, char *argv[])
{
  // Process command line arguments.
  for (int i = 1; i < argc; i++) {
    ustring argument;
    argument = argv[i];
    if (argument.length() > 2) {
      if (argument.substr (0, 2) == "--") {
        argument.erase (0, 2);
        if (argument == "markers-in-stylesheet") {
          check_in_stylesheet = true;
          i++;
          argument = argv[i];
          Parse parse (argument);
          for (unsigned int i2 = 0; i2 < parse.words.size(); i2++) {
            styles_in_sheet.insert (parse.words[i2]);
          }
          continue;
        }
        if (argument == "help") {
          cout << "sc-check-usfm reads checking units from stdin." << endl;
          cout << "It performs checks related to the USFM standard, that are not done elsewhere," << endl;
          cout << "due to sc-input-usfm that hides certain USFM specific information." << endl;
          cout << "--markers-in-stylesheet <space separated list of markers in stylesheet>" << endl;
          cout << "  When this argument is given, it also checks whether any marker is in the" << endl;
          cout << "  list of markers given, and gives a message if not." << endl;
          return 0;
        }
      }
    }
  }
  // Read data from stdin.
  GIOChannel* io;
  gchar* gtext;
  gsize length;
  io = g_io_channel_unix_new (0);
  g_io_channel_read_to_end (io, &gtext, &length, NULL);
  // Set up parser.
  GMarkupParseContext *context;
  GMarkupParser parser = {
    start_element_handler,
    end_element_handler,
    text_handler,
    passthrough_handler,
    error_handler
  };
  // Parse xml data.
  context = g_markup_parse_context_new (&parser, GMarkupParseFlags (0), NULL, NULL);
  g_markup_parse_context_parse (context, gtext, length, NULL);
  g_markup_parse_context_end_parse (context, NULL);
  // Free some resources.  
  g_markup_parse_context_free (context);
  g_free (gtext);
  g_io_channel_unref (io);
  // Ready.
  return 0;
}
