-- Copyright 1999 Daniel Elphick and others
-- Licensed under Eiffel Forum Freeware License, version 1;
-- (see forum.txt)

%{
indexing

    description: "XML parsing class"
	author: "Daniel Elphick <dre00r@ecs.soton.ac.uk>"

class XML_PARSER

inherit

    YY_PARSER_SKELETON [ANY]
		rename
			make as make_skeleton
		redefine
			report_error
		end
	XML_TOKENS

creation

    make

%}

%token	<STRING>				TEXTUAL
%token	<STRING>				WHITE_SPACE
%token 							PI_START
%token 							PI_END
%token							END_TAG_START


%type	<LINKED_LIST[ANY]>		tags
%type	<TAG_TREE>				tag
%type	<TAG_TREE>				xml_file
%type	<STRING>				start_tag
%type	<STRING>				end_tag
%type	<STRING>				pi_tag

%% -- Grammar rules and actions follow.

xml_file	: 	pi_tag tags
				{
					!!tree.make("ROOT")
					tree.add_child($1)
					tree.add_children($2)
					$$ := tree
				}
			;

tag			:	start_tag tags end_tag
				{
					if $1.is_equal($3) then
						!!$$.make($1)
						$$.add_children($2)
					else
						std.error.put_string("Start tag '")
						std.error.put_string($1)
						std.error.put_string("' does not match end tag '")
						std.error.put_string($3)
						std.error.put_string("'%N")
						raise_error
					end
				}
			|	start_tag TEXTUAL end_tag
				{
					if $1.is_equal($3) then
						!!$$.make($1)
						$$.add_child($2)
					else
						std.error.put_string("Start tag '")
						std.error.put_string($1)
						std.error.put_string("' does not match end tag '")
						std.error.put_string($3)
						std.error.put_string("'%N")
						raise_error
					end
				}
			;

tags		:	tags tag
				{
					$1.add_last($2)
					$$ := $1
				}
			|	
				{
					!!$$.make
				}
			;

start_tag	:	'<' TEXTUAL '>'
				{
					$$ := $2
				}
			;

end_tag		:	END_TAG_START TEXTUAL '>' 
				{
					$$ := $2
				}
			;

pi_tag		:	PI_START TEXTUAL PI_END 
				{
					$$ := $2
				}
			;
			
%%

feature

	tree: TAG_TREE
	input: INPUT_STREAM
	line_number: INTEGER
	
	make(stream: INPUT_STREAM) is
		do
			input := stream
			line_number := 1
			make_skeleton
			state := initial
		end

	expand_last_value is
		local
			i: INTEGER
			temp: STRING
		do
			temp := last_value
			!!last_value.make(temp.count)
			from
				i := 1
			until
				i > temp.count
			loop
				inspect
					temp @ i
				when '%N' then
					last_value.append("%%N")
				when '%T' then
					last_value.append("%%T")
				when '%%' then
					last_value.append("%%%%")
				when '&' then
					if (temp.count - i) >= 3 then
						if (temp @ (i + 1)) = 'g' then
					    	if (temp @ (i + 2)) = 't' and then
					    	   (temp @ (i + 3)) = ';' then
								i := i + 3
								last_value.append_character('>')
							else
								last_value.append_character('&')
							end
						elseif (temp @ (i + 1)) = 'l' then
							if (temp @ (i + 2)) = 't' and then
							   (temp @ (i + 3)) = ';' then
								i := i + 3
								last_value.append_character('<')
							else
								last_value.append_character('&')
							end
						else
							last_value.append_character('&')
						end
					else
						last_value.append_character('&')
					end
				else
					last_value.append_character(temp @ i)
				end
				i := i + 1
			end
		end

	state: INTEGER
	pending_character: CHARACTER
	has_pending_character: BOOLEAN
	last_character: CHARACTER
	last_value: STRING
	last_token: INTEGER

	error: INTEGER is -1
	yyEOF_token: INTEGER is 0
	initial: INTEGER is 1
	expect: INTEGER is 2
	expect_pi_end: INTEGER is 3
	expect_start_end: INTEGER is 4
	expect_end_end: INTEGER is 5
	in_pi: INTEGER is 6
	in_start: INTEGER is 7
	in_end: INTEGER is 8
	weird: INTEGER is 9

	report_error(a_message: STRING) is
		do
			std.error.put_string(a_message)
			std.error.put_string(" at line ")
			std.error.put_integer(line_number)
			std.error.put_character('%N')
			std.error.put_string("last_token = ")
			std.error.put_integer(last_token)
			std.error.put_character('%N')
			std.error.put_string("last_character = '")
			std.error.put_character(last_character)
			std.error.put_string("'%N")
			std.error.put_string("state = ")
			std.error.put_integer(state)
			std.error.put_string("%N")
		end

	read_token is
		do
			!!last_value.make(0)
			if has_pending_character then
				last_character := input.last_character
				has_pending_character := False
			else
				input.read_character
				if not input.end_of_input then
					last_character := input.last_character
					if last_character = '%N' then
						line_number := line_number + 1
					end
				else
					last_token := yyEOF_token
					state := yyEOF_token
				end
			end
			inspect
				state
			when initial then
				last_token := 9999 -- not a token but > yyEOF_token
				from
				until
					last_character = '<' or else last_token <= yyEOF_token
				loop
					if not ("%T%N <").has(last_character) then
						std.error.put_string("Error in INITIAL state%N")
						last_token := error
					else
						input.read_character
						if not input.end_of_input then
							last_character := input.last_character
							if last_character = '%N' then
								line_number := line_number + 1
							end
						else
							last_token := yyEOF_token
						end
					end
						
				end
				if last_token /= error and last_token /= yyEOF_token then
					state := expect
					read_token
				end
			when weird then
				from
					last_token := 0
				until
					last_character = '<' or else last_token = error
				loop
					if last_character = '>' then
						std.error.put_string("Error in WEIRD state%N")
						last_token := error
					else
						if not (" %T%N").has(last_character) then
							last_token := TEXTUAL
							from
							until
								last_character = '<' or else last_token = error
							loop
								if last_character = '>' then
									last_token := error
								else
									last_value.append_character(last_character)
									input.read_character
									if input.end_of_input then
										std.error.put_string("Error in WEIRD state%N")
										last_token := error
									else
										last_character := input.last_character
										if last_character = '%N' then
											line_number := line_number + 1
										end
									end
								end
							end
									
						else
							last_value.append_character(last_character)
							input.read_character
							if input.end_of_input then
								std.error.put_string("Error in WEIRD state%N")
								last_token := error
							else
								last_character := input.last_character
								if last_character = '%N' then
									line_number := line_number + 1
								end
							end
						end
					end
				end
				state := expect
				if last_token = 0 then
					input.read_character
					if not input.end_of_input then
						if input.last_character = '/' then
							last_token := TEXTUAL
							expand_last_value
							input.unread_character
						else
							input.unread_character
							read_token
						end
					end
				elseif last_token = TEXTUAL then
					expand_last_value
				end
			when expect then
				inspect
					last_character
				when '?' then
					last_token := PI_START
					state := in_pi
				when '/' then
					last_token := END_TAG_START
					state := in_end
				else
					last_token := ('<').code
					state := in_start
					pending_character := last_character
					has_pending_character := True
				end
			when expect_pi_end then
				if last_character = '>' then
					last_token := PI_END
					state := initial
				else
					std.error.put_string("Error in EXPECT_PI_END state%N")
					last_token := error
				end
			when expect_start_end then
				if last_character = '>' then
					last_token := ('>').code
					state := weird
				else
					std.error.put_string("Error in EXPECT_START_END state%N")
					last_token := error
				end
			when expect_end_end then
				if last_character = '>' then
					last_token := ('>').code
					state := initial
				else
					std.error.put_string("Error in EXPECT_END_END state%N")
					last_token := error
				end
			when in_pi then
				from
				until
					last_character = '>' or else last_token = error
				loop
					if last_character /= '<' then
						last_value.append_character(last_character)
						input.read_character
						if not input.end_of_input then
							last_character := input.last_character
							if last_character = '%N' then
								line_number := line_number + 1
							end
						else
							std.error.put_string("Error in IN_PI state%N")
							last_token := error
						end
					else
						last_token := error
					end
				end
				if last_token /= error and then last_value.item(last_value.count) = '?' then
					last_value.remove_last(1) -- strip off '?'
					pending_character := '>'
					has_pending_character := True
					last_token := TEXTUAL
					state := expect_pi_end
				else
					std.error.put_string("Error in IN_PI state%N")
					last_token := error
				end
			when in_start then
				from
				until
					last_character = '>' or else last_token = error
				loop
					if last_character /= '<' then
						last_value.append_character(last_character)
						input.read_character
						if not input.end_of_input then
							last_character := input.last_character
							if last_character = '%N' then
								line_number := line_number + 1
							end
						else
							std.error.put_string("Error in IN_START state%N")
							last_token := error
						end
					else
						std.error.put_string("Error in IN_START state%N")
						last_token := error
					end
				end
				if last_token /= error then
					pending_character := '>'
					has_pending_character := True
					last_token := TEXTUAL
					state := expect_start_end
				end
			when in_end then
				from
				until
					last_character = '>' or else last_token = error
				loop
					if last_character /= '<' then
						last_value.append_character(last_character)
						input.read_character
						if not input.end_of_input then
							last_character := input.last_character
							if last_character = '%N' then
								line_number := line_number + 1
							end
						else
							std.error.put_string("Error in IN_START state%N")
							last_token := error
						end
					else
						std.error.put_string("Error in IN_START state%N")
						last_token := error
					end
				end
				if last_token /= error then
					pending_character := '>'
					has_pending_character := True
					last_token := TEXTUAL
					state := expect_end_end
				end
			when 0 then
			else
				print("How did we get here?%N")
				die_with_code(exit_failure_code)
			end
		end


	
	
end
