ACIL FM
Dark
Refresh
Current DIR:
/usr/share/doc/python3-libxml2
/
usr
share
doc
python3-libxml2
Upload
Zip Selected
Delete Selected
Pilih semua
Nama
Ukuran
Permission
Aksi
apibuild.py
79.77 MB
chmod
View
DL
Edit
Rename
Delete
index.py
32.13 MB
chmod
View
DL
Edit
Rename
Delete
libxml2class.txt
22.44 MB
chmod
View
DL
Edit
Rename
Delete
python.html
19.44 MB
chmod
View
DL
Edit
Rename
Delete
TODO
1.57 MB
chmod
View
DL
Edit
Rename
Delete
Edit file: /usr/share/doc/python3-libxml2/index.py
#!/usr/bin/python -u # # imports the API description and fills up a database with # name relevance to modules, functions or web pages # # Operation needed: # ================= # # install mysqld, the python wrappers for mysql and libxml2, start mysqld # Change the root passwd of mysql: # mysqladmin -u root password new_password # Create the new database xmlsoft # mysqladmin -p create xmlsoft # Create a database user 'veillard' and give him passord access # change veillard and abcde with the right user name and passwd # mysql -p # password: # mysql> GRANT ALL PRIVILEGES ON xmlsoft TO veillard@localhost # IDENTIFIED BY 'abcde' WITH GRANT OPTION; # # As the user check the access: # mysql -p xmlsoft # Enter password: # Welcome to the MySQL monitor.... # mysql> use xmlsoft # Database changed # mysql> quit # Bye # # Then run the script in the doc subdir, it will create the symbols and # word tables and populate them with information extracted from # the libxml2-api.xml API description, and make them accessible read-only # by nobody@loaclhost the user expected to be Apache's one # # On the Apache configuration, make sure you have php support enabled # import MySQLdb import libxml2 import sys import string import os # # We are not interested in parsing errors here # def callback(ctx, str): return libxml2.registerErrorHandler(callback, None) # # The dictionary of tables required and the SQL command needed # to create them # TABLES={ "symbols" : """CREATE TABLE symbols ( name varchar(255) BINARY NOT NULL, module varchar(255) BINARY NOT NULL, type varchar(25) NOT NULL, descr varchar(255), UNIQUE KEY name (name), KEY module (module))""", "words" : """CREATE TABLE words ( name varchar(50) BINARY NOT NULL, symbol varchar(255) BINARY NOT NULL, relevance int, KEY name (name), KEY symbol (symbol), UNIQUE KEY ID (name, symbol))""", "wordsHTML" : """CREATE TABLE wordsHTML ( name varchar(50) BINARY NOT NULL, resource varchar(255) BINARY NOT NULL, section varchar(255), id varchar(50), relevance int, KEY name (name), KEY resource (resource), UNIQUE KEY ref (name, resource))""", "wordsArchive" : """CREATE TABLE wordsArchive ( name varchar(50) BINARY NOT NULL, ID int(11) NOT NULL, relevance int, KEY name (name), UNIQUE KEY ref (name, ID))""", "pages" : """CREATE TABLE pages ( resource varchar(255) BINARY NOT NULL, title varchar(255) BINARY NOT NULL, UNIQUE KEY name (resource))""", "archives" : """CREATE TABLE archives ( ID int(11) NOT NULL auto_increment, resource varchar(255) BINARY NOT NULL, title varchar(255) BINARY NOT NULL, UNIQUE KEY id (ID,resource(255)), INDEX (ID), INDEX (resource))""", "Queries" : """CREATE TABLE Queries ( ID int(11) NOT NULL auto_increment, Value varchar(50) NOT NULL, Count int(11) NOT NULL, UNIQUE KEY id (ID,Value(35)), INDEX (ID))""", "AllQueries" : """CREATE TABLE AllQueries ( ID int(11) NOT NULL auto_increment, Value varchar(50) NOT NULL, Count int(11) NOT NULL, UNIQUE KEY id (ID,Value(35)), INDEX (ID))""", } # # The XML API description file to parse # API="libxml2-api.xml" DB=None ######################################################################### # # # MySQL database interfaces # # # ######################################################################### def createTable(db, name): global TABLES if db == None: return -1 if name == None: return -1 c = db.cursor() ret = c.execute("DROP TABLE IF EXISTS %s" % (name)) if ret == 1: print "Removed table %s" % (name) print "Creating table %s" % (name) try: ret = c.execute(TABLES[name]) except: print "Failed to create table %s" % (name) return -1 return ret def checkTables(db, verbose = 1): global TABLES if db == None: return -1 c = db.cursor() nbtables = c.execute("show tables") if verbose: print "Found %d tables" % (nbtables) tables = {} i = 0 while i < nbtables: l = c.fetchone() name = l[0] tables[name] = {} i = i + 1 for table in TABLES.keys(): if not tables.has_key(table): print "table %s missing" % (table) createTable(db, table) try: ret = c.execute("SELECT count(*) from %s" % table); row = c.fetchone() if verbose: print "Table %s contains %d records" % (table, row[0]) except: print "Troubles with table %s : repairing" % (table) ret = c.execute("repair table %s" % table); print "repairing returned %d" % (ret) ret = c.execute("SELECT count(*) from %s" % table); row = c.fetchone() print "Table %s contains %d records" % (table, row[0]) if verbose: print "checkTables finished" # make sure apache can access the tables read-only try: ret = c.execute("GRANT SELECT ON xmlsoft.* TO nobody@localhost") ret = c.execute("GRANT INSERT,SELECT,UPDATE ON xmlsoft.Queries TO nobody@localhost") except: pass return 0 def openMySQL(db="xmlsoft", passwd=None, verbose = 1): global DB if passwd == None: try: passwd = os.environ["MySQL_PASS"] except: print "No password available, set environment MySQL_PASS" sys.exit(1) DB = MySQLdb.connect(passwd=passwd, db=db) if DB == None: return -1 ret = checkTables(DB, verbose) return ret def updateWord(name, symbol, relevance): global DB if DB == None: openMySQL() if DB == None: return -1 if name == None: return -1 if symbol == None: return -1 c = DB.cursor() try: ret = c.execute( """INSERT INTO words (name, symbol, relevance) VALUES ('%s','%s', %d)""" % (name, symbol, relevance)) except: try: ret = c.execute( """UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'""" % (relevance, name, symbol)) except: print "Update word (%s, %s, %s) failed command" % (name, symbol, relevance) print "UPDATE words SET relevance = %d where name = '%s' and symbol = '%s'" % (relevance, name, symbol) print sys.exc_type, sys.exc_value return -1 return ret def updateSymbol(name, module, type, desc): global DB updateWord(name, name, 50) if DB == None: openMySQL() if DB == None: return -1 if name == None: return -1 if module == None: return -1 if type == None: return -1 try: desc = string.replace(desc, "'", " ") l = string.split(desc, ".") desc = l[0] desc = desc[0:99] except: desc = "" c = DB.cursor() try: ret = c.execute( """INSERT INTO symbols (name, module, type, descr) VALUES ('%s','%s', '%s', '%s')""" % (name, module, type, desc)) except: try: ret = c.execute( """UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" % (module, type, desc, name)) except: print "Update symbol (%s, %s, %s) failed command" % (name, module, type) print """UPDATE symbols SET module='%s', type='%s', descr='%s' where name='%s'""" % (module, type, desc, name) print sys.exc_type, sys.exc_value return -1 return ret def addFunction(name, module, desc = ""): return updateSymbol(name, module, 'function', desc) def addMacro(name, module, desc = ""): return updateSymbol(name, module, 'macro', desc) def addEnum(name, module, desc = ""): return updateSymbol(name, module, 'enum', desc) def addStruct(name, module, desc = ""): return updateSymbol(name, module, 'struct', desc) def addConst(name, module, desc = ""): return updateSymbol(name, module, 'const', desc) def addType(name, module, desc = ""): return updateSymbol(name, module, 'type', desc) def addFunctype(name, module, desc = ""): return updateSymbol(name, module, 'functype', desc) def addPage(resource, title): global DB if DB == None: openMySQL() if DB == None: return -1 if resource == None: return -1 c = DB.cursor() try: ret = c.execute( """INSERT INTO pages (resource, title) VALUES ('%s','%s')""" % (resource, title)) except: try: ret = c.execute( """UPDATE pages SET title='%s' WHERE resource='%s'""" % (title, resource)) except: print "Update symbol (%s, %s, %s) failed command" % (name, module, type) print """UPDATE pages SET title='%s' WHERE resource='%s'""" % (title, resource) print sys.exc_type, sys.exc_value return -1 return ret def updateWordHTML(name, resource, desc, id, relevance): global DB if DB == None: openMySQL() if DB == None: return -1 if name == None: return -1 if resource == None: return -1 if id == None: id = "" if desc == None: desc = "" else: try: desc = string.replace(desc, "'", " ") desc = desc[0:99] except: desc = "" c = DB.cursor() try: ret = c.execute( """INSERT INTO wordsHTML (name, resource, section, id, relevance) VALUES ('%s','%s', '%s', '%s', '%d')""" % (name, resource, desc, id, relevance)) except: try: ret = c.execute( """UPDATE wordsHTML SET section='%s', id='%s', relevance='%d' where name='%s' and resource='%s'""" % (desc, id, relevance, name, resource)) except: print "Update symbol (%s, %s, %d) failed command" % (name, resource, relevance) print """UPDATE wordsHTML SET section='%s', id='%s', relevance='%d' where name='%s' and resource='%s'""" % (desc, id, relevance, name, resource) print sys.exc_type, sys.exc_value return -1 return ret def checkXMLMsgArchive(url): global DB if DB == None: openMySQL() if DB == None: return -1 if url == None: return -1 c = DB.cursor() try: ret = c.execute( """SELECT ID FROM archives WHERE resource='%s'""" % (url)) row = c.fetchone() if row == None: return -1 except: return -1 return row[0] def addXMLMsgArchive(url, title): global DB if DB == None: openMySQL() if DB == None: return -1 if url == None: return -1 if title == None: title = "" else: title = string.replace(title, "'", " ") title = title[0:99] c = DB.cursor() try: cmd = """INSERT INTO archives (resource, title) VALUES ('%s','%s')""" % (url, title) ret = c.execute(cmd) cmd = """SELECT ID FROM archives WHERE resource='%s'""" % (url) ret = c.execute(cmd) row = c.fetchone() if row == None: print "addXMLMsgArchive failed to get the ID: %s" % (url) return -1 except: print "addXMLMsgArchive failed command: %s" % (cmd) return -1 return((int)(row[0])) def updateWordArchive(name, id, relevance): global DB if DB == None: openMySQL() if DB == None: return -1 if name == None: return -1 if id == None: return -1 c = DB.cursor() try: ret = c.execute( """INSERT INTO wordsArchive (name, id, relevance) VALUES ('%s', '%d', '%d')""" % (name, id, relevance)) except: try: ret = c.execute( """UPDATE wordsArchive SET relevance='%d' where name='%s' and ID='%d'""" % (relevance, name, id)) except: print "Update word archive (%s, %d, %d) failed command" % (name, id, relevance) print """UPDATE wordsArchive SET relevance='%d' where name='%s' and ID='%d'""" % (relevance, name, id) print sys.exc_type, sys.exc_value return -1 return ret ######################################################################### # # # Word dictionary and analysis routines # # # ######################################################################### # # top 100 english word without the one len < 3 + own set # dropWords = { 'the':0, 'this':0, 'can':0, 'man':0, 'had':0, 'him':0, 'only':0, 'and':0, 'not':0, 'been':0, 'other':0, 'even':0, 'are':0, 'was':0, 'new':0, 'most':0, 'but':0, 'when':0, 'some':0, 'made':0, 'from':0, 'who':0, 'could':0, 'after':0, 'that':0, 'will':0, 'time':0, 'also':0, 'have':0, 'more':0, 'these':0, 'did':0, 'was':0, 'two':0, 'many':0, 'they':0, 'may':0, 'before':0, 'for':0, 'which':0, 'out':0, 'then':0, 'must':0, 'one':0, 'through':0, 'with':0, 'you':0, 'said':0, 'first':0, 'back':0, 'were':0, 'what':0, 'any':0, 'years':0, 'his':0, 'her':0, 'where':0, 'all':0, 'its':0, 'now':0, 'much':0, 'she':0, 'about':0, 'such':0, 'your':0, 'there':0, 'into':0, 'like':0, 'may':0, 'would':0, 'than':0, 'our':0, 'well':0, 'their':0, 'them':0, 'over':0, 'down':0, 'net':0, 'www':0, 'bad':0, 'Okay':0, 'bin':0, 'cur':0, } wordsDict = {} wordsDictHTML = {} wordsDictArchive = {} def cleanupWordsString(str): str = string.replace(str, ".", " ") str = string.replace(str, "!", " ") str = string.replace(str, "?", " ") str = string.replace(str, ",", " ") str = string.replace(str, "'", " ") str = string.replace(str, '"', " ") str = string.replace(str, ";", " ") str = string.replace(str, "(", " ") str = string.replace(str, ")", " ") str = string.replace(str, "{", " ") str = string.replace(str, "}", " ") str = string.replace(str, "<", " ") str = string.replace(str, ">", " ") str = string.replace(str, "=", " ") str = string.replace(str, "/", " ") str = string.replace(str, "*", " ") str = string.replace(str, ":", " ") str = string.replace(str, "#", " ") str = string.replace(str, "\\", " ") str = string.replace(str, "\n", " ") str = string.replace(str, "\r", " ") str = string.replace(str, "\xc2", " ") str = string.replace(str, "\xa0", " ") return str def cleanupDescrString(str): str = string.replace(str, "'", " ") str = string.replace(str, "\n", " ") str = string.replace(str, "\r", " ") str = string.replace(str, "\xc2", " ") str = string.replace(str, "\xa0", " ") l = string.split(str) str = string.join(str) return str def splitIdentifier(str): ret = [] while str != "": cur = string.lower(str[0]) str = str[1:] if ((cur < 'a') or (cur > 'z')): continue while (str != "") and (str[0] >= 'A') and (str[0] <= 'Z'): cur = cur + string.lower(str[0]) str = str[1:] while (str != "") and (str[0] >= 'a') and (str[0] <= 'z'): cur = cur + str[0] str = str[1:] while (str != "") and (str[0] >= '0') and (str[0] <= '9'): str = str[1:] ret.append(cur) return ret def addWord(word, module, symbol, relevance): global wordsDict if word == None or len(word) < 3: return -1 if module == None or symbol == None: return -1 if dropWords.has_key(word): return 0 if ord(word[0]) > 0x80: return 0 if wordsDict.has_key(word): d = wordsDict[word] if d == None: return 0 if len(d) > 500: wordsDict[word] = None return 0 try: relevance = relevance + d[(module, symbol)] except: pass else: wordsDict[word] = {} wordsDict[word][(module, symbol)] = relevance return relevance def addString(str, module, symbol, relevance): if str == None or len(str) < 3: return -1 ret = 0 str = cleanupWordsString(str) l = string.split(str) for word in l: if len(word) > 2: ret = ret + addWord(word, module, symbol, 5) return ret def addWordHTML(word, resource, id, section, relevance): global wordsDictHTML if word == None or len(word) < 3: return -1 if resource == None or section == None: return -1 if dropWords.has_key(word): return 0 if ord(word[0]) > 0x80: return 0 section = cleanupDescrString(section) if wordsDictHTML.has_key(word): d = wordsDictHTML[word] if d == None: print "skipped %s" % (word) return 0 try: (r,i,s) = d[resource] if i != None: id = i if s != None: section = s relevance = relevance + r except: pass else: wordsDictHTML[word] = {} d = wordsDictHTML[word]; d[resource] = (relevance, id, section) return relevance def addStringHTML(str, resource, id, section, relevance): if str == None or len(str) < 3: return -1 ret = 0 str = cleanupWordsString(str) l = string.split(str) for word in l: if len(word) > 2: try: r = addWordHTML(word, resource, id, section, relevance) if r < 0: print "addWordHTML failed: %s %s" % (word, resource) ret = ret + r except: print "addWordHTML failed: %s %s %d" % (word, resource, relevance) print sys.exc_type, sys.exc_value return ret def addWordArchive(word, id, relevance): global wordsDictArchive if word == None or len(word) < 3: return -1 if id == None or id == -1: return -1 if dropWords.has_key(word): return 0 if ord(word[0]) > 0x80: return 0 if wordsDictArchive.has_key(word): d = wordsDictArchive[word] if d == None: print "skipped %s" % (word) return 0 try: r = d[id] relevance = relevance + r except: pass else: wordsDictArchive[word] = {} d = wordsDictArchive[word]; d[id] = relevance return relevance def addStringArchive(str, id, relevance): if str == None or len(str) < 3: return -1 ret = 0 str = cleanupWordsString(str) l = string.split(str) for word in l: i = len(word) if i > 2: try: r = addWordArchive(word, id, relevance) if r < 0: print "addWordArchive failed: %s %s" % (word, id) else: ret = ret + r except: print "addWordArchive failed: %s %s %d" % (word, id, relevance) print sys.exc_type, sys.exc_value return ret ######################################################################### # # # XML API description analysis # # # ######################################################################### def loadAPI(filename): doc = libxml2.parseFile(filename) print "loaded %s" % (filename) return doc def foundExport(file, symbol): if file == None: return 0 if symbol == None: return 0 addFunction(symbol, file) l = splitIdentifier(symbol) for word in l: addWord(word, file, symbol, 10) return 1 def analyzeAPIFile(top): count = 0 name = top.prop("name") cur = top.children while cur != None: if cur.type == 'text': cur = cur.next continue if cur.name == "exports": count = count + foundExport(name, cur.prop("symbol")) else: print "unexpected element %s in API doc <file name='%s'>" % (name) cur = cur.next return count def analyzeAPIFiles(top): count = 0 cur = top.children while cur != None: if cur.type == 'text': cur = cur.next continue if cur.name == "file": count = count + analyzeAPIFile(cur) else: print "unexpected element %s in API doc <files>" % (cur.name) cur = cur.next return count def analyzeAPIEnum(top): file = top.prop("file") if file == None: return 0 symbol = top.prop("name") if symbol == None: return 0 addEnum(symbol, file) l = splitIdentifier(symbol) for word in l: addWord(word, file, symbol, 10) return 1 def analyzeAPIConst(top): file = top.prop("file") if file == None: return 0 symbol = top.prop("name") if symbol == None: return 0 addConst(symbol, file) l = splitIdentifier(symbol) for word in l: addWord(word, file, symbol, 10) return 1 def analyzeAPIType(top): file = top.prop("file") if file == None: return 0 symbol = top.prop("name") if symbol == None: return 0 addType(symbol, file) l = splitIdentifier(symbol) for word in l: addWord(word, file, symbol, 10) return 1 def analyzeAPIFunctype(top): file = top.prop("file") if file == None: return 0 symbol = top.prop("name") if symbol == None: return 0 addFunctype(symbol, file) l = splitIdentifier(symbol) for word in l: addWord(word, file, symbol, 10) return 1 def analyzeAPIStruct(top): file = top.prop("file") if file == None: return 0 symbol = top.prop("name") if symbol == None: return 0 addStruct(symbol, file) l = splitIdentifier(symbol) for word in l: addWord(word, file, symbol, 10) info = top.prop("info") if info != None: info = string.replace(info, "'", " ") info = string.strip(info) l = string.split(info) for word in l: if len(word) > 2: addWord(word, file, symbol, 5) return 1 def analyzeAPIMacro(top): file = top.prop("file") if file == None: return 0 symbol = top.prop("name") if symbol == None: return 0 symbol = string.replace(symbol, "'", " ") symbol = string.strip(symbol) info = None cur = top.children while cur != None: if cur.type == 'text': cur = cur.next continue if cur.name == "info": info = cur.content break cur = cur.next l = splitIdentifier(symbol) for word in l: addWord(word, file, symbol, 10) if info == None: addMacro(symbol, file) print "Macro %s description has no <info>" % (symbol) return 0 info = string.replace(info, "'", " ") info = string.strip(info) addMacro(symbol, file, info) l = string.split(info) for word in l: if len(word) > 2: addWord(word, file, symbol, 5) return 1 def analyzeAPIFunction(top): file = top.prop("file") if file == None: return 0 symbol = top.prop("name") if symbol == None: return 0 symbol = string.replace(symbol, "'", " ") symbol = string.strip(symbol) info = None cur = top.children while cur != None: if cur.type == 'text': cur = cur.next continue if cur.name == "info": info = cur.content elif cur.name == "return": rinfo = cur.prop("info") if rinfo != None: rinfo = string.replace(rinfo, "'", " ") rinfo = string.strip(rinfo) addString(rinfo, file, symbol, 7) elif cur.name == "arg": ainfo = cur.prop("info") if ainfo != None: ainfo = string.replace(ainfo, "'", " ") ainfo = string.strip(ainfo) addString(ainfo, file, symbol, 5) name = cur.prop("name") if name != None: name = string.replace(name, "'", " ") name = string.strip(name) addWord(name, file, symbol, 7) cur = cur.next if info == None: print "Function %s description has no <info>" % (symbol) addFunction(symbol, file, "") else: info = string.replace(info, "'", " ") info = string.strip(info) addFunction(symbol, file, info) addString(info, file, symbol, 5) l = splitIdentifier(symbol) for word in l: addWord(word, file, symbol, 10) return 1 def analyzeAPISymbols(top): count = 0 cur = top.children while cur != None: if cur.type == 'text': cur = cur.next continue if cur.name == "macro": count = count + analyzeAPIMacro(cur) elif cur.name == "function": count = count + analyzeAPIFunction(cur) elif cur.name == "const": count = count + analyzeAPIConst(cur) elif cur.name == "typedef": count = count + analyzeAPIType(cur) elif cur.name == "struct": count = count + analyzeAPIStruct(cur) elif cur.name == "enum": count = count + analyzeAPIEnum(cur) elif cur.name == "functype": count = count + analyzeAPIFunctype(cur) else: print "unexpected element %s in API doc <files>" % (cur.name) cur = cur.next return count def analyzeAPI(doc): count = 0 if doc == None: return -1 root = doc.getRootElement() if root.name != "api": print "Unexpected root name" return -1 cur = root.children while cur != None: if cur.type == 'text': cur = cur.next continue if cur.name == "files": pass # count = count + analyzeAPIFiles(cur) elif cur.name == "symbols": count = count + analyzeAPISymbols(cur) else: print "unexpected element %s in API doc" % (cur.name) cur = cur.next return count ######################################################################### # # # Web pages parsing and analysis # # # ######################################################################### import glob def analyzeHTMLText(doc, resource, p, section, id): words = 0 try: content = p.content words = words + addStringHTML(content, resource, id, section, 5) except: return -1 return words def analyzeHTMLPara(doc, resource, p, section, id): words = 0 try: content = p.content words = words + addStringHTML(content, resource, id, section, 5) except: return -1 return words def analyzeHTMLPre(doc, resource, p, section, id): words = 0 try: content = p.content words = words + addStringHTML(content, resource, id, section, 5) except: return -1 return words def analyzeHTML(doc, resource, p, section, id): words = 0 try: content = p.content words = words + addStringHTML(content, resource, id, section, 5) except: return -1 return words def analyzeHTML(doc, resource): para = 0; ctxt = doc.xpathNewContext() try: res = ctxt.xpathEval("//head/title") title = res[0].content except: title = "Page %s" % (resource) addPage(resource, title) try: items = ctxt.xpathEval("//h1 | //h2 | //h3 | //text()") section = title id = "" for item in items: if item.name == 'h1' or item.name == 'h2' or item.name == 'h3': section = item.content if item.prop("id"): id = item.prop("id") elif item.prop("name"): id = item.prop("name") elif item.type == 'text': analyzeHTMLText(doc, resource, item, section, id) para = para + 1 elif item.name == 'p': analyzeHTMLPara(doc, resource, item, section, id) para = para + 1 elif item.name == 'pre': analyzeHTMLPre(doc, resource, item, section, id) para = para + 1 else: print "Page %s, unexpected %s element" % (resource, item.name) except: print "Page %s: problem analyzing" % (resource) print sys.exc_type, sys.exc_value return para def analyzeHTMLPages(): ret = 0 HTMLfiles = glob.glob("*.html") + glob.glob("tutorial/*.html") for html in HTMLfiles: if html[0:3] == "API": continue if html == "xml.html": continue try: doc = libxml2.parseFile(html) except: doc = libxml2.htmlParseFile(html, None) try: res = analyzeHTML(doc, html) print "Parsed %s : %d paragraphs" % (html, res) ret = ret + 1 except: print "could not parse %s" % (html) return ret ######################################################################### # # # Mail archives parsing and analysis # # # ######################################################################### import time def getXMLDateArchive(t = None): if t == None: t = time.time() T = time.gmtime(t) month = time.strftime("%B", T) year = T[0] url = "http://mail.gnome.org/archives/xml/%d-%s/date.html" % (year, month) return url def scanXMLMsgArchive(url, title, force = 0): if url == None or title == None: return 0 ID = checkXMLMsgArchive(url) if force == 0 and ID != -1: return 0 if ID == -1: ID = addXMLMsgArchive(url, title) if ID == -1: return 0 try: print "Loading %s" % (url) doc = libxml2.htmlParseFile(url, None); except: doc = None if doc == None: print "Failed to parse %s" % (url) return 0 addStringArchive(title, ID, 20) ctxt = doc.xpathNewContext() texts = ctxt.xpathEval("//pre//text()") for text in texts: addStringArchive(text.content, ID, 5) return 1 def scanXMLDateArchive(t = None, force = 0): global wordsDictArchive wordsDictArchive = {} url = getXMLDateArchive(t) print "loading %s" % (url) try: doc = libxml2.htmlParseFile(url, None); except: doc = None if doc == None: print "Failed to parse %s" % (url) return -1 ctxt = doc.xpathNewContext() anchors = ctxt.xpathEval("//a[@href]") links = 0 newmsg = 0 for anchor in anchors: href = anchor.prop("href") if href == None or href[0:3] != "msg": continue try: links = links + 1 msg = libxml2.buildURI(href, url) title = anchor.content if title != None and title[0:4] == 'Re: ': title = title[4:] if title != None and title[0:6] == '[xml] ': title = title[6:] newmsg = newmsg + scanXMLMsgArchive(msg, title, force) except: pass return newmsg ######################################################################### # # # Main code: open the DB, the API XML and analyze it # # # ######################################################################### def analyzeArchives(t = None, force = 0): global wordsDictArchive ret = scanXMLDateArchive(t, force) print "Indexed %d words in %d archive pages" % (len(wordsDictArchive), ret) i = 0 skipped = 0 for word in wordsDictArchive.keys(): refs = wordsDictArchive[word] if refs == None: skipped = skipped + 1 continue; for id in refs.keys(): relevance = refs[id] updateWordArchive(word, id, relevance) i = i + 1 print "Found %d associations in HTML pages" % (i) def analyzeHTMLTop(): global wordsDictHTML ret = analyzeHTMLPages() print "Indexed %d words in %d HTML pages" % (len(wordsDictHTML), ret) i = 0 skipped = 0 for word in wordsDictHTML.keys(): refs = wordsDictHTML[word] if refs == None: skipped = skipped + 1 continue; for resource in refs.keys(): (relevance, id, section) = refs[resource] updateWordHTML(word, resource, section, id, relevance) i = i + 1 print "Found %d associations in HTML pages" % (i) def analyzeAPITop(): global wordsDict global API try: doc = loadAPI(API) ret = analyzeAPI(doc) print "Analyzed %d blocks" % (ret) doc.freeDoc() except: print "Failed to parse and analyze %s" % (API) print sys.exc_type, sys.exc_value sys.exit(1) print "Indexed %d words" % (len(wordsDict)) i = 0 skipped = 0 for word in wordsDict.keys(): refs = wordsDict[word] if refs == None: skipped = skipped + 1 continue; for (module, symbol) in refs.keys(): updateWord(word, symbol, refs[(module, symbol)]) i = i + 1 print "Found %d associations, skipped %d words" % (i, skipped) def usage(): print "Usage index.py [--force] [--archive] [--archive-year year] [--archive-month month] [--API] [--docs]" sys.exit(1) def main(): try: openMySQL() except: print "Failed to open the database" print sys.exc_type, sys.exc_value sys.exit(1) args = sys.argv[1:] force = 0 if args: i = 0 while i < len(args): if args[i] == '--force': force = 1 elif args[i] == '--archive': analyzeArchives(None, force) elif args[i] == '--archive-year': i = i + 1; year = args[i] months = ["January" , "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]; for month in months: try: str = "%s-%s" % (year, month) T = time.strptime(str, "%Y-%B") t = time.mktime(T) + 3600 * 24 * 10; analyzeArchives(t, force) except: print "Failed to index month archive:" print sys.exc_type, sys.exc_value elif args[i] == '--archive-month': i = i + 1; month = args[i] try: T = time.strptime(month, "%Y-%B") t = time.mktime(T) + 3600 * 24 * 10; analyzeArchives(t, force) except: print "Failed to index month archive:" print sys.exc_type, sys.exc_value elif args[i] == '--API': analyzeAPITop() elif args[i] == '--docs': analyzeHTMLTop() else: usage() i = i + 1 else: usage() if __name__ == "__main__": main()
Simpan
Batal
Isi Zip:
Unzip
Create
Buat Folder
Buat File
Terminal / Execute
Run
Chmod Bulk
All File
All Folder
All File dan Folder
Apply