本帖最后由 GoGo闯 于 2014-5-28 18:34 编辑
- #coding:utf-8
- #批量查询360关键词指数
- import urllib
- import urllib2
- import re
- import time
- import linecache
- req = re.compile('{"query":"(.*?)","data":{.*?"week_index":([0-9]+),"month_index":([0-9]+)}}')
- wordindex_360 = open('360wordindex.txt','a')
- nu = 1
- for word in open('360word.txt'):
- url = 'http://index.so.com/index.php?a=overviewJson&q=%s&area=全国' % urllib.quote_plus(word)
- headers = {
- ......
- }
- postData = {
- "a":"overiewJsom",
- "q":"x",
- "area":"word"
- }
- postData = urllib.urlencode(postData)
-
- time.sleep(0.25)
- #请求并发送制定的构造数据
- request = urllib2.Request(url, postData, headers)
- response = urllib2.urlopen(request)
- text = response.read()
-
-
- y = linecache.getline(r'daili.txt',nu)
- if "360指数_访问异常出错" in text:
- print "切换代理IP:"+y
- proxy_support = urllib2.ProxyHandler({'http':y})
- opener = urllib2.build_opener(proxy_support, urllib2.HTTPHandler)
- urllib2.install_opener(opener)
- request = urllib2.Request(url, postData, headers)
- text = urllib2.urlopen(request).read()
- # time.sleep(600)
- nu = nu + 1
- continue
- else:
- word_date = re.findall(req,text)
- for x in word_date:
- data_list = list(x)
- data_str = ','.join(data_list).decode('unicode_escape').encode('utf-8').replace(',',' ') #unicode编码转中文
- wordindex_360.write(data_str+'\n')
- print data_str
-
复制代码
运行结果:
ps:注意空格和tab,有时粘过去再改改缩进会有问题,改完格式化一遍
|