(*
 * Parse the shell line.
 *    command ::= word+
 *            |   command > word
 *            |   command >& word
 *            |   command < word
 *    pipe ::= command
 *          |  command | command
 *          |  command |& command
 *
 * ----------------------------------------------------------------
 *
 * @begin[license]
 * Copyright (C) 2003 Jason Hickey, Caltech
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURLOCE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * Author: Jason Hickey
 * @email{jyh@cs.caltech.edu}
 * @end[license]
 *)

{
open Format

open Lm_debug
open Lm_symbol
open Lm_location

open Omake_ast
open Omake_env
open Omake_shell_parse

module Pos = MakePos (struct let name = "Omake_shell_lex" end)
open Pos

(*
 * File location.
 *)
let current_file  = ref (Lm_symbol.add "-")
let current_line  = ref 1
let current_schar = ref 0
let current_loc   = ref (Lm_location.bogus_loc "<stdin>")

(*
 * Set state.
 *)
let set_current_file name =
   current_file  := Lm_symbol.add name;
   current_line  := 1;
   current_schar := 0;
   current_loc   := Lm_location.bogus_loc name

let set_current_location loc =
   let file, sline, schar, _, _ = dest_loc loc in
      current_file := file;
      current_line := sline;
      current_schar := schar;
      current_loc := loc

let set_current_loc loc =
   current_loc := loc

let current_location () =
   !current_loc

(*
 * Advance a line.
 *)
let set_next_line lexbuf =
   incr current_line;
   current_schar := Lexing.lexeme_end lexbuf;
   let loc = create_loc !current_file !current_line 0 !current_line 0 in
      set_current_loc loc

(*
 * Get the location of the current lexeme.
 * We assume it is all on one line.
 *)
let lexeme_loc lexbuf =
   let line = !current_line in
   let schar = Lexing.lexeme_start lexbuf - !current_schar in
   let echar = Lexing.lexeme_end lexbuf - !current_schar in
   let file = !current_file in
   let loc = create_loc file line schar line echar in
      set_current_loc loc;
      loc

(*
 * Raise a syntax error exception.
 *)
let syntax_error s lexbuf =
   let loc = lexeme_loc lexbuf in
      raise (OmakeException (loc_exp_pos loc, SyntaxError s))

(*
 * Get the string in the lexbuf.
 *)
let lexeme_string lexbuf =
   let loc = lexeme_loc lexbuf in
   let s = Lexing.lexeme lexbuf in
      s, loc

(*
 * Strip the initial whitespace from the string.
 *)
let strip_white_prefix s =
   let len = String.length s in
   let rec strip i =
      if i = len then
         ""
      else
         match s.[i] with
            ' '
          | '\t'
          | '\012'
          | '\n'
          | '\r' ->
                strip (succ i)
          | _ ->
                String.sub s i (len - i)
   in
      strip 0
}

(*
 * White space.
 * Line is terminated by '\n' or eof,
 * but be nice to DOS.
 *)
let whitec          = [' ' '\t' '\012' '\n' '\r']
let white           = whitec +
let opt_white       = whitec *

(*
 * Definitions are a name followed by =
 *)
let name           = ['A'-'Z' 'a'-'z' '0'-'9' '-' '_']+
let def            = name '='

(*
 * Quotes.
 *
 * XXX: JYH: we used to allow the multi-quote form, like ""Hello "quoted" world"".
 * This was confusing, and it makes it really hard to specify empty strings,
 * and it didn't match up with Lm_string_util.quote_argv.
 *)
let squote         = ['\'']
let dquote         = ['"']
let quote          = squote | dquote

(*
 * Special characters.
 *)
let special_char   = ['<' '>' '|' '&' '(' ')' ';'] | "<<" | ">>" | "&&" | "||"

(*
 * Escape sequences.
 *)
let esc_char       = '\\' ['"' '\'' ' ' '\t' '\012']

(*
 * Glob characters.
 *)
let glob_char     = ['*' '?' '[' ']' '~' ]
let glob_esc      = '\\' glob_char

(*
 * Text is any sequence of non-white characters.
 *)
let other         = [^ '\\' '"' '\'' '<' '>' '|' '&' ';' '(' ')' ' ' '\t' '\012' '\n' '\r' 'A'-'Z' 'a'-'z' '0'-'9' '_' '-' '*' '?' '[' ']']+

(*
 * Main lexer.
 *)
rule lex_main = parse
   quote
   { let id, _ = lexeme_string lexbuf in
     let buf = Buffer.create 32 in
        lex_string buf id lexbuf
   }
 | white
   { let s, loc = lexeme_string lexbuf in
        TokWhite (s, loc)
   }
 | def
   { let s, loc = lexeme_string lexbuf in
        TokDefine (s, loc)
   }
 | opt_white special_char opt_white
   { let s, loc = lexeme_string lexbuf in
        match Lm_string_util.trim s with
           "<"  -> TokLessThan (s, loc)
         | ">"  -> TokGreaterThan (s, loc)
	 | ">>" -> TokGreaterGreaterThan (s, loc)
         | "&"  -> TokAmp (s, loc)
         | ";"  -> TokSemiColon (s, loc)
         | "&&" -> TokAnd (s, loc)
         | "|"  -> TokPipe (s, loc)
         | "||" -> TokOr (s, loc)
         | "("  -> TokLeftParen (s, loc)
         | ")"  -> TokRightParen (s, loc)
         | _    -> syntax_error ("illegal operator: " ^ s) lexbuf
   }
 | '\\'
   { let s, loc = lexeme_string lexbuf in
        TokString (s, loc)
   }
 | esc_char
   { let s, loc = lexeme_string lexbuf in
     let s = String.sub s 1 (String.length s - 1) in
        TokString (s, loc)
   }
 | glob_char
 | name
 | other
   { let s, loc = lexeme_string lexbuf in
        TokString (s, loc)
   }
 | _
   { let s, loc = lexeme_string lexbuf in
        syntax_error ("illegal character: " ^ String.escaped s) lexbuf
   }
 | eof
   { let loc = lexeme_loc lexbuf in
        TokEof loc
   }

(*
 * Read a string.
 *)
and lex_string buf term = parse
   quote
   { let s, loc = lexeme_string lexbuf in
        if s = term then
           TokData (Buffer.contents buf, loc)
        else
           begin
              Buffer.add_string buf s;
	      lex_string buf term lexbuf
	   end
   }
 | '\\'
   { let s, loc = lexeme_string lexbuf in
        Buffer.add_string buf s;
        lex_string buf term lexbuf
   }
 | glob_char
   { let s, loc = lexeme_string lexbuf in
        Buffer.add_char buf '\\';
        Buffer.add_string buf s;
        lex_string buf term lexbuf
   }
 | esc_char
   { let s, loc = lexeme_string lexbuf in
        Buffer.add_substring buf s 1 (String.length s - 1);
        lex_string buf term lexbuf
   }
 | glob_esc
 | white
 | name
 | white
 | special_char
 | other
   { let s, loc = lexeme_string lexbuf in
        Buffer.add_string buf s;
        lex_string buf term lexbuf
   }
 | _
   { let s, loc = lexeme_string lexbuf in
        syntax_error ("illegal character: " ^ String.escaped s) lexbuf
   }
 | eof
   { let loc = lexeme_loc lexbuf in
        TokEof loc
   }

{
let parse s =
   let s = Lm_string_util.trim s in
   let lexbuf = Lexing.from_string s in
      try Omake_shell_parse.prog lex_main lexbuf with
         Parsing.Parse_error ->
            syntax_error ("\"" ^ String.escaped s ^ "\"") lexbuf
}

(*!
 * @docoff
 *
 * -*-
 * Local Variables:
 * Caml-master: "compile"
 * End:
 * -*-
 *)
