openlayers/tools/exampleparser.py

   1 #!/usr/bin/env python
   2
   3 import sys
   4 import os
   5 import re
   6 import urllib2
   7 import time
   8 from xml.dom.minidom import Document
   9
  10 try:
  11     import xml.etree.ElementTree as ElementTree
  12 except ImportError:
  13     try:
  14         import cElementTree as ElementTree
  15     except ImportError:
  16         try:
  17             import elementtree.ElementTree as ElementTree
  18         except ImportError:
  19             import lxml.etree as ElementTree
  20
  21 missing_deps = False
  22 try:
  23     import simplejson
  24     from BeautifulSoup import BeautifulSoup
  25 except ImportError, E:
  26     missing_deps = E
  27
  28 feedName = "example-list.xml"
  29 feedPath = "http://openlayers.org/dev/examples/"
  30
  31 def getListOfOnlineExamples(baseUrl):
  32     """
  33     useful if you want to get a list of examples a url. not used by default.
  34     """
  35     html = urllib2.urlopen(baseUrl)
  36     soup = BeautifulSoup(html)
  37     examples = soup.findAll('li')
  38     examples = [example.find('a').get('href') for example in examples]
  39     examples = [example for example in examples if example.endswith('.html')]
  40     examples = [example for example in examples]
  41     return examples
  42
  43 def getListOfExamples(relPath):
  44     """
  45     returns list of .html filenames within a given path - excludes example-list.html
  46     """
  47     examples = os.listdir(relPath)
  48     examples = [example for example in examples if example.endswith('.html') and example != "example-list.html"]
  49     return examples
  50
  51
  52 def getExampleHtml(location):
  53     """
  54     returns html of a specific example that is available online or locally
  55     """
  56     print '.',
  57     if location.startswith('http'):
  58         return urllib2.urlopen(location).read()
  59     else:
  60         f = open(location)
  61         html = f.read()
  62         f.close()
  63         return html
  64
  65
  66 def extractById(soup, tagId, value=None):
  67     """
  68     returns full contents of a particular tag id
  69     """
  70     beautifulTag = soup.find(id=tagId)
  71     if beautifulTag:
  72         if beautifulTag.contents:
  73             value = str(beautifulTag.renderContents()).strip()
  74             value = value.replace('\t','')
  75             value = value.replace('\n','')
  76     return value
  77
  78 def getRelatedClasses(html):
  79     """
  80     parses the html, and returns a list of all OpenLayers Classes
  81     used within (ie what parts of OL the javascript uses).
  82     """
  83     rawstr = r'''(?P<class>OpenLayers\..*?)\('''
  84     return re.findall(rawstr, html)
  85
  86 def parseHtml(html,ids):
  87     """
  88     returns dictionary of items of interest
  89     """
  90     soup = BeautifulSoup(html)
  91     d = {}
  92     for tagId in ids:
  93         d[tagId] = extractById(soup,tagId)
  94     #classes should eventually be parsed from docs - not automatically created.
  95     classes = getRelatedClasses(html)
  96     d['classes'] = classes
  97     return d
  98
  99 def getSvnInfo(path):
 100     h = os.popen("svn info %s --xml" % path)
 101     tree = ElementTree.fromstring(h.read())
 102     h.close()
 103     d = {
 104         'url': tree.findtext('entry/url'),
 105         'author': tree.findtext('entry/commit/author'),
 106         'date': tree.findtext('entry/commit/date')
 107     }
 108     return d
 109
 110 def createFeed(examples):
 111     doc = Document()
 112     atomuri = "http://www.w3.org/2005/Atom"
 113     feed = doc.createElementNS(atomuri, "feed")
 114     feed.setAttribute("xmlns", atomuri)
 115     title = doc.createElementNS(atomuri, "title")
 116     title.appendChild(doc.createTextNode("OpenLayers Examples"))
 117     feed.appendChild(title)
 118     link = doc.createElementNS(atomuri, "link")
 119     link.setAttribute("rel", "self")
 120     link.setAttribute("href", feedPath + feedName)
 121
 122     modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime())
 123     id = doc.createElementNS(atomuri, "id")
 124     id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, feedName, modtime)))
 125     feed.appendChild(id)
 126
 127     updated = doc.createElementNS(atomuri, "updated")
 128     updated.appendChild(doc.createTextNode(modtime))
 129     feed.appendChild(updated)
 130
 131     examples.sort(key=lambda x:x["modified"])
 132     for example in sorted(examples, key=lambda x:x["modified"], reverse=True):
 133         entry = doc.createElementNS(atomuri, "entry")
 134
 135         title = doc.createElementNS(atomuri, "title")
 136         title.appendChild(doc.createTextNode(example["title"] or example["example"]))
 137         entry.appendChild(title)
 138
 139         link = doc.createElementNS(atomuri, "link")
 140         link.setAttribute("href", "%s%s" % (feedPath, example["example"]))
 141         entry.appendChild(link)
 142
 143         summary = doc.createElementNS(atomuri, "summary")
 144         summary.appendChild(doc.createTextNode(example["shortdesc"] or example["example"]))
 145         entry.appendChild(summary)
 146
 147         updated = doc.createElementNS(atomuri, "updated")
 148         updated.appendChild(doc.createTextNode(example["modified"]))
 149         entry.appendChild(updated)
 150
 151         author = doc.createElementNS(atomuri, "author")
 152         name = doc.createElementNS(atomuri, "name")
 153         name.appendChild(doc.createTextNode(example["author"]))
 154         author.appendChild(name)
 155         entry.appendChild(author)
 156
 157         id = doc.createElementNS(atomuri, "id")
 158         id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, example["example"], example["modified"])))
 159         entry.appendChild(id)
 160
 161         feed.appendChild(entry)
 162
 163     doc.appendChild(feed)
 164     return doc
 165
 166 def wordIndex(examples):
 167     """
 168     Create an inverted index based on words in title and shortdesc.  Keys are
 169     lower cased words.  Values are dictionaries with example index keys and
 170     count values.
 171     """
 172     index = {}
 173     unword = re.compile("\\W+")
 174     keys = ["shortdesc", "title"]
 175     for i in range(len(examples)):
 176         for key in keys:
 177             text = examples[i][key]
 178             if text:
 179                 words = unword.split(text)
 180                 for word in words:
 181                     if word:
 182                         word = word.lower()
 183                         if index.has_key(word):
 184                             if index[word].has_key(i):
 185                                 index[word][i] += 1
 186                             else:
 187                                 index[word][i] = 1
 188                         else:
 189                             index[word] = {i: 1}
 190     return index
 191
 192 if __name__ == "__main__":
 193
 194     if missing_deps:
 195         print "This script requires simplejson and BeautifulSoup. You don't have them. \n(%s)" % E
 196         sys.exit()
 197
 198     if len(sys.argv) > 1:
 199         outFile = open(sys.argv[1],'w')
 200     else:
 201         outFile = open('../examples/example-list.js','w')
 202
 203     examplesLocation = '../examples'
 204     print 'Reading examples from %s and writing out to %s' % (examplesLocation, outFile.name)
 205
 206     exampleList = []
 207     docIds = ['title','shortdesc']
 208
 209     #comment out option to create docs from online resource
 210     #examplesLocation = 'http://svn.openlayers.org/sandbox/docs/examples/'
 211     #examples = getListOfOnlineExamples(examplesLocation)
 212
 213     examples = getListOfExamples(examplesLocation)
 214
 215     modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime())
 216
 217     for example in examples:
 218         url = os.path.join(examplesLocation,example)
 219         html = getExampleHtml(url)
 220         tagvalues = parseHtml(html,docIds)
 221         tagvalues['example'] = example
 222         # add in svn info
 223         d = getSvnInfo(url)
 224         tagvalues["modified"] = d["date"] or modtime
 225         tagvalues["author"] = d["author"] or "anonymous"
 226         tagvalues['link'] = example
 227
 228         exampleList.append(tagvalues)
 229
 230     print
 231
 232     exampleList.sort(key=lambda x:x['example'].lower())
 233
 234     index = wordIndex(exampleList)
 235
 236     json = simplejson.dumps({"examples": exampleList, "index": index})
 237     #give the json a global variable we can use in our js.  This should be replaced or made optional.
 238     json = 'var info=' + json
 239     outFile.write(json)
 240     outFile.close()
 241
 242     print "writing feed to ../examples/%s " % feedName
 243     atom = open('../examples/%s' % feedName, 'w')
 244     doc = createFeed(exampleList)
 245     atom.write(doc.toxml())
 246     atom.close()
 247
 248
 249     print 'complete'
 250
 251