http://localhost:8080/_ah/admin/interactive より実行
from google.appengine.api import urlfetch
from elementtree.ElementTree import *
import re
rss1 = 'http://movies.go.com/xml/rss/intheaters.xml'
url = rss1
rss = urlfetch.Fetch(url)
print rss
tree = ElementTree(fromstring(rss.content))
img_re = re.compile('<img src="([a-zA-Z0-9/:._\-]+)" ')
for item in tree.findall('.//item'):
title = item.find('title').text.strip()
description = item.find('description').text.strip()
imageMatch = img_re.search(description)
if (imageMatch):
picture_url = imageMatch.groups()[0]
print title
print picture_url
---
Speed Racer
http://movies.go.com/i/movies/895881/895881.jpg
--- 特定のページの src = .jpg 検出 ---
from google.appengine.api import urlfetch
from elementtree.ElementTree import *
import re
url = 'http://webdba.blogspot.com/2008/05/urlfetch-rss-img.html'
content = urlfetch.Fetch(url).content
img_re = re.compile('<img src="([a-zA-Z0-9/:._\-]+)" ',re.I)
img_re = re.compile('src="([a-zA-Z0-9/:._\-]+).jpg" ',re.I)
img_re = re.compile('src="([a-zA-Z0-9/:._\-]+[jpg|gif|png])" ',re.I)
imageMatch = img_re.search(content)
imageMatch = img_re.findall(content)
print imageMatch
for i in imageMatch:
print i
---
['http://code.google.com/appengine/images/appengine_lowres.jpg']
http://code.google.com/appengine/images/appengine_lowres.jpg