| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
|
|---|
| 4 |
|
|---|
| 5 |
|
|---|
| 6 |
|
|---|
| 7 |
|
|---|
| 8 |
|
|---|
| 9 |
|
|---|
| 10 |
|
|---|
| 11 |
|
|---|
| 12 |
|
|---|
| 13 |
|
|---|
| 14 |
|
|---|
| 15 |
|
|---|
| 16 |
|
|---|
| 17 |
|
|---|
| 18 |
from urllib2 import urlopen, URLError |
|---|
| 19 |
import sha |
|---|
| 20 |
from HTMLParser import HTMLParser, HTMLParseError |
|---|
| 21 |
|
|---|
| 22 |
def microid(c, p, newstyle = True, hash = "sha1"): |
|---|
| 23 |
""" |
|---|
| 24 |
This function generates a MicroID digest based on claimer URI and property URI |
|---|
| 25 |
|
|---|
| 26 |
>>> microid("http://nicolast.be", "http://www.eikke.com", False) |
|---|
| 27 |
'a6a18b671b0239ed85a32543ec487f443582e926' |
|---|
| 28 |
>>> microid("http://nicolast.be", "http://blog.eikke.com") |
|---|
| 29 |
'http+http:sha1:207ec367c4f64dc9d37c47fb490ed9c2f686f875' |
|---|
| 30 |
>>> microid("mailto:spambox@nicolast.be", "http://nicolast.be") |
|---|
| 31 |
'mailto+http:sha1:6646f18368bc40c4095092d61807ae98de9ef19a' |
|---|
| 32 |
""" |
|---|
| 33 |
hashfunc = None |
|---|
| 34 |
if hash == "sha1": |
|---|
| 35 |
hashfunc = sha.new |
|---|
| 36 |
else: |
|---|
| 37 |
raise Exception, "Only sha1 hashing is supported for now" |
|---|
| 38 |
|
|---|
| 39 |
digest = hashfunc(hashfunc(c).hexdigest() + hashfunc(p).hexdigest()).hexdigest() |
|---|
| 40 |
|
|---|
| 41 |
if newstyle: |
|---|
| 42 |
s1 = c.split(":")[0] |
|---|
| 43 |
s2 = p.split(":")[0] |
|---|
| 44 |
digest = "%(firsturi)s+%(seconduri)s:%(hash)s:%(microid)s" % {"firsturi": s1, "seconduri": s2, "hash": hash, "microid": digest} |
|---|
| 45 |
return digest |
|---|
| 46 |
|
|---|
| 47 |
class MicroIDParser(HTMLParser): |
|---|
| 48 |
in_head = False |
|---|
| 49 |
microids = [] |
|---|
| 50 |
|
|---|
| 51 |
def handle_starttag(self, tag, attrs): |
|---|
| 52 |
if tag.lower() == "head": |
|---|
| 53 |
self.in_head = True |
|---|
| 54 |
if self.in_head and tag.lower() == "meta" and self.get_attr(attrs, "name") and self.get_attr(attrs, "name").lower() == "microid": |
|---|
| 55 |
self.microids.append(self.get_attr(attrs, "content")) |
|---|
| 56 |
|
|---|
| 57 |
def handle_endtag(self, tag): |
|---|
| 58 |
if tag.lower() == "head": |
|---|
| 59 |
self.in_head = False |
|---|
| 60 |
|
|---|
| 61 |
def get_microids(self): |
|---|
| 62 |
return self.microids |
|---|
| 63 |
|
|---|
| 64 |
def get_attr(self, attrs, name): |
|---|
| 65 |
for a in attrs: |
|---|
| 66 |
if a[0] == name: |
|---|
| 67 |
return a[1] |
|---|
| 68 |
return None |
|---|
| 69 |
|
|---|
| 70 |
def find_microid(uri): |
|---|
| 71 |
ret = None |
|---|
| 72 |
p = MicroIDParser() |
|---|
| 73 |
buffsize = 4096 |
|---|
| 74 |
try: |
|---|
| 75 |
handle = urlopen(uri) |
|---|
| 76 |
html = handle.read(buffsize) |
|---|
| 77 |
while html: |
|---|
| 78 |
p.feed(html) |
|---|
| 79 |
html = handle.read(buffsize) |
|---|
| 80 |
ret = p.get_microids() |
|---|
| 81 |
except URLError, ue: |
|---|
| 82 |
pass |
|---|
| 83 |
except HTMLParseError, he: |
|---|
| 84 |
pass |
|---|
| 85 |
except ValueError, ve: |
|---|
| 86 |
pass |
|---|
| 87 |
|
|---|
| 88 |
return ret |
|---|