⼵ミニ練習はSGMLParserを使ってurlリンクを取得します.
2295 ワード
#
#coding:utf-8
from sgmllib import SGMLParser
class urlparser(SGMLParser):
def reset(self):
self.result=[]
SGMLParser.reset(self)
def start_a(self,attrs):
#
href=[v for k,v in attrs if k=='href']
if href:
self.result.extend(href)
if __name__ == '__main__':
urls='''
<tr>
<td height="207" colspan="2" align="left" valign="top" class="normal">
<p>Damien Rice - 《0》 </p>
<a href="http://galeki.xy568.net/music/Delicate.mp3">1. Delicate</a><br />
<a href="http://galeki.xy568.net/music/Volcano.mp3">2. Volcano</a><br />
<a href="http://galeki.xy568.net/music/The Blower's Daughter.mp3">3. The Blower's Daughter</a><br />
<a href="http://galeki.xy568.net/music/Cannonball.mp3">4. Cannonball </a><br />
<a href="http://galeki.xy568.net/music/Older Chests.mp3">5. Order Chests</a><br />
<a href="http://galeki.xy568.net/music/Amie.mp3">6. Amie</a><br />
<a href="http://galeki.xy568.net/music/Cheers Darlin'.mp3">7. Cheers Darling</a><br />
<a href="http://galeki.xy568.net/music/Cold Water.mp3">8. Cold water</a><br />
<a href="http://galeki.xy568.net/music/I Remember.mp3">9. I remember</a><br />
<a href="http://galeki.xy568.net/music/Eskimo.mp3">10. Eskimo</a></p>
</td>
</tr>
'''
upr=urlparser()
upr.feed(urls)
for i in upr.result:
print i
upr.close()
結果:http://galeki.xy568.net/music/Delicate.mp3 http://galeki.xy568.net/music/Volcano.mp3 http://galeki.xy568.net/music/The Blower's Daughter.mp 3http://galeki.xy568.net/music/Cannonball.mp3 http://galeki.xy568.net/music/Older Chests.mp 3http://galeki.xy568.net/music/Amie.mp3 http://galeki.xy568.net/music/Cheers Darlin'.mp 3http://galeki.xy568.net/music/Cold Water.mp 3http://galeki.xy568.net/music/I Remember.mp 3http://galeki.xy568.net/music/Eskimo.mp3