Python处理Last-Modified
Python检查某个URI是否有修改,可以检查HTTP Response的Last-Modified字段
import urllib2
import datetime
from urllib2 import HTTPError
import datetime
def check_modified(url, last_modified, field_name="Last-Modified"):
request = urllib2.Request(url)
opener = urllib2.build_opener()
request.add_header('If-Modified-Since', last_modified)
try:
resp = opener.open(request)
print "Last-Modified: " + resp.headers.get(field_name)
except HTTPError as e:
if 304 == e.code:
print "Not Modified Since: " + last_modified
last_modified = datetime.datetime(2011,11,15,0,0).strftime("%a, %d %h %Y %H:%M:%S GMT")
check_modified('http://www.kuaishubao.com', last_modified)
有的服务器并不返回Last-Modified, 所以需要看情况处理,比如
check_modified('http://sunliwen.com', last_modified, field_name="date")