【伪原创】方便懒人用 闯哥莫怪
【其实我也想混点分。。。。。。。】
- #encoding=utf-8
- import urllib2,urllib,re
- import cookielib
- import sys
- reload(sys)
- sys.setdefaultencoding('utf-8')
- URL_BAIDU_INDEX = u'http://www.baidu.com/';
- URL_BAIDU_TOKEN = 'https://passport.baidu.com/v2/api/?getapi&tpl=pp&apiver=v3&class=login';
- URL_BAIDU_LOGIN = 'https://passport.baidu.com/v2/api/?login';
- #设置用户名、密码
- username = '账号';
- password = '密码';
-
- #设置cookie,这里cookiejar可自动管理,无需手动指定
- cj = cookielib.CookieJar();
- opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj));
- urllib2.install_opener(opener);
- reqReturn = urllib2.urlopen(URL_BAIDU_INDEX);
-
- #获取token,
- tokenReturn = urllib2.urlopen(URL_BAIDU_TOKEN);
- matchVal = re.search(u'"token" : "(?P<tokenVal>.*?)"',tokenReturn.read());
- tokenVal = matchVal.group('tokenVal');
-
- #构造登录请求参数,该请求数据是通过抓包获得,对应https://passport.baidu.com/v2/api/?login请求
- postData = {
- 'username' : username,
- 'password' : password,
- 'u' : 'https://passport.baidu.com/',
- 'tpl' : 'pp',
- 'token' : tokenVal,
- 'staticpage' : 'https://passport.baidu.com/static/passpc-account/html/v3Jump.html',
- 'isPhone' : 'false',
- 'charset' : 'UTF-8',
- 'callback' : 'parent.bd__pcbs__ra48vi'
- };
- postData = urllib.urlencode(postData);
-
- #发送登录请求
- loginRequest = urllib2.Request(URL_BAIDU_LOGIN,postData);
- loginRequest.add_header('Accept','text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
- loginRequest.add_header('Accept-Encoding','gzip,deflate,sdch');
- loginRequest.add_header('Accept-Language','zh-CN,zh;q=0.8');
- loginRequest.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko');
- loginRequest.add_header('Content-Type','application/x-www-form-urlencoded');
- sendPost = urllib2.urlopen(loginRequest);
- #测试登陆
- #houtai = 'http://zhanzhang.baidu.com/crawltools/index'
- # content = urllib2.urlopen(houtai)
- #print content.getcode()#测试登陆
- #传递数据
- def psot(http,prefix,suffix):
-
- login_data ={
- "prefix":prefix,
- "suffix":suffix
- }
- print login_data
- url='http://zhanzhang.baidu.com/indexs/addrule?site=%s'%http
- headers={
- 'X-Request-By':'baidu.ajax',#省略也是可以的
- 'X-Requested-With':'XMLHttpRequest',
- 'Referer':'http://zhanzhang.baidu.com/indexs/index?site=%s'%http,
- }
-
- data= urllib.urlencode(login_data)
- req=urllib2.Request(url,data,headers)
- try:
- reason=urllib2.urlopen(req)
- a=reason.read()
- reason.close()
- except Exception, e:
- print e
- #准备一个名为‘post_index.txt’的文件,里面以tab分两列,分别对应百度索引量工具添加url要填写的两部分内容,例子:
- #* abc*p*.html
- #* a*.html
- #* b*c*/?*
- def main(http):
- for x in open('c://1//post_index.txt','r'):
- prefix = x.strip().split('\t')[0]
- suffix= x.strip().split('\t')[1]
- psot(http,prefix,suffix)
- if __name__ == '__main__':
- main(http="http://网址")
复制代码 |