# The contents of this file are subject to the Mozilla Public
# License Version 1.1 (the "License"); you may not use this file
# except in compliance with the License. You may obtain a copy of
# the License at http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS
# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
# implied. See the License for the specific language governing
# rights and limitations under the License.
#
# The Original Code is RDFGrabber version 1.0.
#
# The Initial Developer of the Original Code is European Environment
# Agency (EEA).  Portions created by EEA are
# Copyright (C) European Environment Agency.  All
# Rights Reserved.
#
# Contributor(s):
# Soren Roug, EEA
#
import operator, string, time
try:
    import urllib2 # Support for authenticated proxy
    ulib2 = 1
except:
    import urllib
    ulib2 = 0
import xmllib
import sys
from types import *
from objects import resource, literal
from const import *
from string import join, split

ns_separator = " "

ABOUT_ATTRIBUTE = RDFNS + ns_separator + "about"
ID_ATTRIBUTE = RDFNS + ns_separator + "ID"
RESOURCE_ATTRIBUTE = RDFNS + ns_separator + "resource"
PARSETYPE_ATTRIBUTE = RDFNS + ns_separator + "parseType"

EXPECT_RESOURCE = 0
EXPECT_PROPERTY = 1

class Context:
    def __init__(self,subject,lang,state):
	self.subject = subject
	self.lang = lang
	self.state = state

class RDFParser(xmllib.XMLParser):
    """Parse an RDF file"""

    def __init__(self,adder,http_proxy=None):
	self.adder=adder
        if http_proxy:
	    self._proxies = { 'http': http_proxy }
	else:
	    self._proxies = {}
	self.state = None
	self.encoding = 'UTF-8'
	self.context = []
	self.subject = None
	self.predicate = []
	self.object=None
	self.li_count = 0
	self.anon_count = 0
	self.__data = []
	self.lang = ''
	self.elements = {
	# RDF
	  RDFNS + ns_separator + 'RDF':
		  (self.start_rdf,self.end_rdf),
	  RDFNS + ns_separator + 'Description':
		  (self.start_description,self.end_description),
	  RDFNS + ns_separator + 'Seq':
		  (self.start_container,self.end_container),
	  RDFNS + ns_separator + 'Bag':
		  (self.start_container,self.end_container),
	  RDFNS + ns_separator + 'Alt':
		  (self.start_container,self.end_container),
	  RDFNS + ns_separator + 'li':
		  (self.start_list,self.end_list),
	  }
	xmllib.XMLParser.__init__(self)

    def push(self):
	self.context.append(Context(self.subject, self.lang, self.state))
    
    def pop(self):
	self.context.pop()
	self.lang = self.context[-1].lang
	self.state = self.context[-1].state
	self.subject = self.context[-1].subject

    def dupcontext(self):
	self.context.append(self.context[-1])

    def generate_uri(self):
	"Generate a unique id for an anonymous resource"
	self.anon_count = self.anon_count + 1
	return resource("genid:" + str(self.anon_count),anonymous=1)

    def xmllang_attr(self,atts):
	"Check attributes for xml:lang and delete it"
        if atts.has_key("xml:lang"):
	    self.lang = atts["xml:lang"]
	    del atts["xml:lang"]

    def about_id_attr(self,atts):
        if atts.has_key(ABOUT_ATTRIBUTE):
	    if atts[ABOUT_ATTRIBUTE][0] == "#":
		self.subject = resource(self.baseurl + atts[ABOUT_ATTRIBUTE])
	    else:
		self.subject = resource(atts[ABOUT_ATTRIBUTE])
	    self.push()
	    del atts[ABOUT_ATTRIBUTE]
        elif atts.has_key(ID_ATTRIBUTE):
	    self.subject = resource(self.baseurl + "#" + atts[ID_ATTRIBUTE])
            self.push()
	    del atts[ID_ATTRIBUTE]
        else:
	    self.subject = self.generate_uri()
            self.push()

    def handle_xml(self,encoding,standalone):
	if(encoding):
	    self.encoding = encoding

    def unknown_starttag(self, tag, atts):
	if string.find(tag," ") == -1:
	    if self.baseurl[-1] != '/' and self.baseurl[-1] != '#':
		tag = self.baseurl + '#' + ns_separator + tag
	    else:
		tag = self.baseurl + ns_separator + tag
	if self.state == EXPECT_RESOURCE:
	    self.start_resource(tag, atts)
	else:
	    self.start_property(tag,atts)

    def unknown_endtag(self, tag):
	if string.find(tag," ") == -1:
	    tag = self.baseurl + ns_separator + tag
	if self.state == EXPECT_PROPERTY:
	    self.end_resource(tag)
	else:
	    self.end_property(tag)

    def start_property(self, tag, atts):
	self.__data = []
	self.object = None
	self.state = EXPECT_RESOURCE
	self.xmllang_attr(atts)
	self.push()
	tag = join(split(tag, ns_separator), "")
        if atts.has_key(PARSETYPE_ATTRIBUTE):
	    if atts[PARSETYPE_ATTRIBUTE] == "Resource":
		del atts[PARSETYPE_ATTRIBUTE]
		newsub = self.generate_uri()
		self.adder(self.subject,tag,newsub)
		self.pop()
		self.subject = newsub
		self.state = EXPECT_PROPERTY
		self.push()
	    else: # Only Literal is allowed
		del atts[PARSETYPE_ATTRIBUTE]
		self.setliteral()
        elif atts.has_key(RESOURCE_ATTRIBUTE):
	    if atts[RESOURCE_ATTRIBUTE][0] == "#":
		self.object = resource(self.baseurl + atts[RESOURCE_ATTRIBUTE])
	    else:
		self.object = resource(atts[RESOURCE_ATTRIBUTE])
	    del atts[RESOURCE_ATTRIBUTE]
	    for att in atts.keys():
		if att == ID_ATTRIBUTE: # Just in case both about and ID
		    pass
		else:
		    new_att = resource(join(split(att, ns_separator), ""))
		    self.adder(self.object, new_att, literal(atts[att],self.lang))

    def end_property(self, tag):
	if self.object == None:
	    self.object = literal(string.join(self.__data,""),self.lang)
	self.pop()
	type = resource(join(split(tag, ns_separator), ""))
	self.adder(self.subject, type, self.object)
	self.object = None

    def start_resource(self, tag, atts):
	self.state = EXPECT_PROPERTY
	self.__data = []
	self.object = None
	self.anon_object=0
	tag = join(split(tag, ns_separator), "")
	self.xmllang_attr(atts)
	self.about_id_attr(atts)
	self.adder(self.context[-1].subject, TYPE, resource(tag))

        for att in atts.keys():
            if att == ID_ATTRIBUTE: # Just in case both about and ID
                pass
            else:
                new_att = resource(join(split(att, ns_separator), ""))
                self.adder(self.context[-1].subject, new_att, literal(atts[att],self.lang))

    def end_resource(self,tag):
	self.object = self.subject
	self.pop()

    def ignore_tag(self,atts=None):
	pass

    def handle_data(self, text):
	self.__data.append(text)

    def handle_cdata(self, text):
        self.__data.append(text)

    def handle_charref(self,ref):
	self.handle_data('&#' + ref + ';')

    def unknown_entityref(self,ref):
	self.handle_data('&' + ref + ';')

    def syntax_error(self,message):
	pass

    def start_rdf(self, tag, atts):
	"Start of rdf info"
	self.xmllang_attr(atts)
	self.state = EXPECT_RESOURCE
	self.subject = None
	self.push()

    def end_rdf(self,tag):
	pass

    def start_container(self, tag, atts):
	self.xmllang_attr(atts)
	self.about_id_attr(atts)
	self.li_count = 0

    def end_container(self,tag):
	tag = join(split(tag, ns_separator), "")
	self.adder(self.subject,TYPE,resource(tag))
	self.pop()

    def start_description(self, tag, atts):
	"""
	rdf:Descriptions are like classes, but don't generate a type
	statement.
	"""
	self.__data = []
	self.object = None
	self.state = EXPECT_PROPERTY
	self.xmllang_attr(atts)
	self.about_id_attr(atts)

        for att in atts.keys():
            if att == ABOUT_ATTRIBUTE or att == ID_ATTRIBUTE:
                pass
            else:
                new_att = resource(join(split(att, ns_separator), ""))
                self.adder(self.context[-1].subject, new_att, literal(atts[att],self.lang))

    def end_description(self,tag):
	self.object = self.subject
	self.pop()

    def start_list(self, tag, atts):
	self.__data = []
	self.object = None
	tag = join(split(tag, ns_separator), "")
	self.li_count = self.li_count + 1
	self.predicate.append(resource(RDFNS + '_' + str(self.li_count)))

        if atts.has_key(RESOURCE_ATTRIBUTE):
	    if atts[RESOURCE_ATTRIBUTE][0] == "#":
		self.object = resource(self.baseurl + atts[RESOURCE_ATTRIBUTE])
	    else:
		self.object = resource(atts[RESOURCE_ATTRIBUTE])
	    del atts[RESOURCE_ATTRIBUTE]
	    for att in atts.keys():
		if att == ID_ATTRIBUTE: # Just in case both about and ID
		    pass
		else:
		    new_att = resource(join(split(att, ns_separator), ""))
		    self.adder(self.predicate[-1], new_att, literal(atts[att],self.lang))

    def end_list(self, tag):
	if self.object == None:
	    self.object = literal(string.join(self.__data,""),self.lang)
	self.adder(self.subject, self.predicate.pop(), self.object)
 	self.object = self.subject

    def handle_starttag(self, tag, method, atts):
        method(tag,atts)

    def handle_endtag(self, tag, method):
        method(tag)

    def parse_url(self, url):
	"""
	Grab the file from the webserver and feed it to the parser
	"""
	self.encoding = 'UTF-8'
	self.state = None
	self.context=[]
	self.subject = None
	self.predicate=[]
	self.object=None
	self.li_count = 0
	self.__data=[]
	self.lang = ''
	self.baseurl = url
        if ulib2 == 1:
            proxy_support = urllib2.ProxyHandler(self._proxies)
            opener = urllib2.build_opener(proxy_support,
	       urllib2.HTTPHandler,urllib2.FileHandler)

            urllib2.install_opener(opener)
            f = urllib2.urlopen(url)
        else:
	    try:
		u = urllib.URLopener(proxies=self._proxies)
	    except IOError, e:
		raise IOError, "Unsupported protocol"
	    else:
		u.addheader("User-agent", "RDFGrabber (helpdesk@eionet.eu.int)")
		f = u.open(url)
	if not f:
	    raise IOError, "Failure in open %s" % url
	self.rdfsource = f.read()
	self.feed(self.rdfsource)
	f.close()
