-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhtmlparser.py
More file actions
38 lines (35 loc) · 1.55 KB
/
htmlparser.py
File metadata and controls
38 lines (35 loc) · 1.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#coding:utf-8
#from html.parser import HTMLParser
#PYTHON 2.7.10
from HTMLParser import HTMLParser
page ='''<sada>가가가</sada><a href="http://click.union.360buy.com/JdClick /?unionId=75" class="f1" style="padding-left:13px; padding-right:14px">쑴땜냘</a></td><td><a href="http://www.letao.com /?source=hao123" class="f1">있慶貢糾냘</a></td><td><a href="http://www.lashou.com/cl_today/w_3001" class="f2">윗癎考뭔</a></td><td><a href="http://www.amazon.cn/?tag=2009hao123famousdaohang" class="f2">饑쯩祈</a></td><td><a href="http://www.vancl.com/?source=hao123mp" class="f1">럴와넒틔</a></td><td><a href="http://reg.jiayuan.com/st/?id=3237&url=/st /main.php" class="f1">各셩솅鍍'''
class hp(HTMLParser):
a_text = False
def __init__(self):
HTMLParser.__init__(self)
self.links = []
self.data_value = []
def handle_starttag(self,tag,attr):
if tag == 'a':
self.a_text = True
#print (dict(attr))
#print attr[0]
if len(attr) == 0:
pass
else:
for (variable, value) in attr:
if variable == "href":
self.links.append(value)
def handle_endtag(self,tag):
if tag == 'a':
self.a_text = False
def handle_data(self,data):
if self.a_text:
self.data_value.append(data)
# print (data)
#if __name__ == "__main__":
yk = hp()
yk.feed(page)
yk.close()
print (yk.links)
print (yk.data_value)