#!/usr/bin/python
# Copyright 2007 Frederik Gladhorn
#/***************************************************************************
#* *
#* This program is free software; you can redistribute it and/or modify *
#* it under the terms of the GNU General Public License as published by *
#* the Free Software Foundation; either version 2 of the License, or *
#* (at your option) any later version. *
#* *
#***************************************************************************/
#convert wordtypes of files that were created during the beta phase
#paste the output at the end of the kvtml file, before the tag
#import xml.dom.minidom
#xml - sax parsing
from xml.sax import make_parser
from xml.sax.handler import ContentHandler
#xml - sax - writing the meta files
from xml.sax.saxutils import XMLGenerator
from xml.sax import saxutils
#from textnormalize import text_normalize_filter
#directory stuff
from dircache import listdir
from os.path import isdir
#for the time - this still has to be improved or done in a different way - file time?
import time
#unicode
import codecs
wordtypeFile = codecs.open("wordtypes_xml", "w", "utf-8")
class KVTML_2_Handler(ContentHandler):
#important tags
CAPTURE_ENTRY = 1
CAPTURE_WORDTYPE = 2
CAPTURE_SUBTYPE = 3
def __init__(self):
self.wordtype=""
self.subtype=""
self._state = None
return
def startDocument(self):
pass
def startElement(self, name, attrs):
if (name == u"entry"):
self.entry_id = attrs["id"]
if (name == u"translation"):
self.trans_id = attrs["id"]
if (name == u"typename"):
self._state = self.CAPTURE_WORDTYPE
if (name == u"subtypename"):
self._state = self.CAPTURE_SUBTYPE
def endElement(self, name):
if (name == u"translation"):
if (len(self.wordtype) > 0):
print self.entry_id, " - ", self.trans_id, " ", self.wordtype, " :: ", self.subtype
wordtypeFile.write(u"")
wordtypeFile.write(self.wordtype)
wordtypeFile.write(u"")
if (len(self.subtype) > 0):
wordtypeFile.write(u"")
wordtypeFile.write(self.subtype)
wordtypeFile.write(u"")
wordtypeFile.write(u"")
wordtypeFile.write(u"")
wordtypeFile.write(u"")
if (len(self.subtype) > 0):
wordtypeFile.write(u"\n")
wordtypeFile.write(u"\n")
self.wordtype=""
self.subtype=""
def characters(self, text):
if self._state == self.CAPTURE_WORDTYPE:
self.wordtype = text
if self._state == self.CAPTURE_SUBTYPE:
self.subtype = text
self._state = None
return
def endDocument(self):
pass
def processFile(voc_file):
#kvtml = parse(open(voc_file))
#root = kvtml.getElementsByTagName("kvtml")[0]
#readLanguages(root)
language = KVTML_2_Handler()
saxparser = make_parser()
saxparser.setContentHandler(language)
datasource = open(voc_file,"r")
wordtypeFile.write(u"\n")
saxparser.parse(datasource)
wordtypeFile.write(u"")
wordtypeFile.close()
def readFile(path):
print "Reading " + path
processFile(path)
def readDirectory(path):
for entry in listdir(path):
if isdir(path + "/" + entry):
#print path + "/" + entry
readDirectory(path + "/" + entry)
else:
#print "a file: " + path + "/" + entry
if entry[-5:] == "kvtml":
readFile(path + "/" + entry)
def main():
print "paste the output into the kvtml file"
rootPath = "."
readDirectory(rootPath)
main()