Python模拟163登陆获取邮件列表

开发 后端
近期看了《python模拟登陆163邮箱并获取通讯录》一文,受到启发,试着对收件箱、发件箱等进行了分析,并列出了所有邮件列表及状态,包括发件人、收件人、主题、发信时间、已读未读等状态。

利用cookielib和urllib2模块模拟登陆163的例子有很多,近期看了《python模拟登陆163邮箱并获取通讯录》一文,受到启发,试着对收件箱、发件箱等进行了分析,并列出了所有邮件列表及状态,包括发件人、收件人、主题、发信时间、已读未读等状态。

1、参考代码http://hi.baidu.com/fc_lamp/blog/item/2466d1096fcc532de8248839.html%EF%BB%BF

  1. #-*- coding:UTF-8 -*-  
  2. import urllib,urllib2,cookielib  
  3. import xml.etree.ElementTree as etree #xml解析类  
  4.  
  5. class Login163:  
  6.    #伪装browser  
  7.     header = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}  
  8.     username = '' 
  9.     passwd = '' 
  10.     cookie = None #cookie对象  
  11.     cookiefile = './cookies.dat' #cookie临时存放地  
  12.     user = '' 
  13.       
  14.     def __init__(self,username,passwd):  
  15.         self.username = username  
  16.         self.passwd = passwd  
  17.         #cookie设置  
  18.         self.cookie = cookielib.LWPCookieJar() #自定义cookie存放  
  19.         opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie))  
  20.         urllib2.install_opener(opener)  
  21.  
  22.    #登陆      
  23.     def login(self):         
  24.  
  25.         #请求参数设置  
  26.         postdata = {  
  27.             'username':self.username,  
  28.             'password':self.passwd,  
  29.             'type':1 
  30.             }  
  31.         postdata = urllib.urlencode(postdata)  
  32.  
  33.         #发起请求  
  34.         req = urllib2.Request(  
  35.                 url='http://reg.163.com/logins.jsp?type=1&product=mail163&url=http://entry.mail.163.com/coremail/fcg/ntesdoor2?lightweight%3D1%26verifycookie%3D1%26language%3D-1%26style%3D1',  
  36.                 data= postdata,#请求数据  
  37.                 headers = self.header #请求头  
  38.             )  
  39.  
  40.         result = urllib2.urlopen(req).read()  
  41.         result = str(result)  
  42.         self.user = self.username.split('@')[0]  
  43.  
  44.         self.cookie.save(self.cookiefile)#保存cookie  
  45.           
  46.         if '登录成功,正在跳转...' in result:  
  47.             #print("%s 你已成功登陆163邮箱。---------\n" %(user))  
  48.             flag = True 
  49.         else:  
  50.             flag = '%s 登陆163邮箱失败。'%(self.user)  
  51.              
  52.         return flag  
  53.  
  54.    #获取通讯录  
  55.     def address_list(self):  
  56.  
  57.         #获取认证sid  
  58.         auth = urllib2.Request(  
  59.                 url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1',  
  60.                 headers = self.header  
  61.             )  
  62.         auth = urllib2.urlopen(auth).read()  
  63.         for i,sid in enumerate(self.cookie):#enumerate()用于同时返数字索引与数值,实际上是一个元组:((0,test[0]),(1,test[1]).......)这有点像php里的foreach 语句的作用  
  64.             sid = str(sid)  
  65.             if 'sid' in sid:  
  66.                 sid = sid.split()[1].split('=')[1]  
  67.                 break 
  68.         self.cookie.save(self.cookiefile)  
  69.           
  70.         #请求地址  
  71.         url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username  
  72.         #参数设定(var 变量是必需要的,不然就只能看到:<code>S_OK</code><messages/>这类信息)  
  73.         #这里参数也是在firebug下查看的。  
  74.         postdata = {  
  75.             'func':'global:sequential',  
  76.             'showAd':'false',  
  77.             'sid':sid,  
  78.             'uid':self.username,  
  79.             'userType':'browser',  
  80.             'var':'<?xml version="1.0"?><object><array name="items"><object><string name="func">pab:searchContacts</string><object name="var"><array name="order"><object><string name="field">FN</string><boolean name="desc">false</boolean><boolean name="ignoreCase">true</boolean></object></array></object></object><object><string name="func">pab:getAllGroups</string></object></array></object>' 
  81.             }  
  82.         postdata = urllib.urlencode(postdata)  
  83.           
  84.         #组装请求  
  85.         req = urllib2.Request(  
  86.             url = url,  
  87.             data = postdata,  
  88.             headers = self.header  
  89.             )  
  90.         res = urllib2.urlopen(req).read()  
  91.           
  92.         #解析XML,转换成json  
  93.         #说明:由于这样请求后163给出的是xml格式的数据,  
  94.         #为了返回的数据能方便使用最好是转为JSON  
  95.         json = []  
  96.         tree = etree.fromstring(res)  
  97.         obj = None 
  98.         for child in tree:  
  99.             if child.tag == 'array':  
  100.                 obj = child              
  101.                 break 
  102.         #这里多参考一下,etree元素的方法属性等,包括attrib,text,tag,getchildren()等  
  103.         obj = obj[0].getchildren().pop()  
  104.         for child in obj:  
  105.             for x in child:  
  106.                 attr = x.attrib  
  107.                 if attr['name']== 'EMAIL;PREF':  
  108.                     value = {'email':x.text}  
  109.                     json.append(value)  
  110.         return json  
  111.           
  112. #Demo  
  113. print("Requesting......\n\n")  
  114. login = Login163('xxxx@163.com','xxxxx')  
  115. flag = login.login()  
  116. if type(flag) is bool:  
  117.     print("Successful landing,Resolved contacts......\n\n")  
  118.     res = login.address_list()  
  119.     for x in res:  
  120.         print(x['email'])  
  121. else:  
  122.     print(flag) 

#p#

2、分析收件箱、发件箱等网址

在参考代码中,获取通讯录的url为

url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username,通过对邮箱地址的分析,发现收件箱、发件箱等的url为url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=mbox:listMessages&showAd=false&userType=browser&uid='+self.username,其中func=mbox:listMessages。其对收件箱、发件箱的具体区分在下面的postdata中,具体为:

(1)收件箱

  1. postdata = {  
  2. 'func':'global:sequential',  
  3. 'showAd':'false',  
  4. 'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr',  
  5. 'uid':self.username,  
  6. 'userType':'browser',  
  7. 'var':'<!--?xml version="1.0"?--><object><int name="fid">1</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>' 

(2)发件箱

  1. postdata = {  
  2. 'func':'global:sequential',  
  3. 'showAd':'false',  
  4. 'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr',  
  5. 'uid':self.username,  
  6. 'userType':'browser',  
  7. 'var':'<!--?xml version="1.0"?--><object><int name="fid">3</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>' 

可以看出,两段代码的不同之处就是fid的取值不同,其中收件箱为1,发件箱为3,草稿箱为2。

#p#

3、xml解析

利用ElementTree 类来进行xml到字典的转换。在获取通讯录的实例中,主要使用了这一方法。本例子(具体代码见后文)在收取邮件列表时,并没有用这一方法,仍然使用的是字符串的处理方法。但这里还是列一下ElementTree 类对xml的处理。如(参考地址:http://hi.baidu.com/fc_lamp/blog/item/8ed2d53ada4586f714cecb3d.html):

  1. -<result>  
  2.    <code>S_OK</code>  
  3.  -<array name="var">  
  4.   -<object>  
  5.      <string name="code">S_OK</string>  
  6.     -<array name="var">  
  7.      +<object></object>  
  8.      +<object></object>  
  9.      +<object></object>  
  10.      +<object></object>  
  11.      +<object></object>  
  12.      +<object></object>  
  13.      +<object></object>  
  14.      +<object></object>  
  15.      +<object></object>  
  16.      +<object></object>  
  17.      +<object></object>  
  18.      +<object></object>  
  19.      +<object></object>  
  20.      +<object></object>  
  21.      +<object></object>  
  22.      +<object></object>  
  23.      </array>  
  24.    </object>  
  25.   +<object></object>  
  26.   </array>  
  27.  </result> 

解决方法:

  1. #-*- coding:UTF-8 -*-  
  2.  
  3. import xml.etree.ElementTree as etree #xml解析类  
  4. def xml2json(xml):  
  5.     json = []  
  6.     tree = etree.fromstring(xml) #如果是文件可用parse(source)  
  7.     obj = None 
  8.     for child in tree:  
  9.         if child.tag == 'array':  
  10.             obj = child              
  11.             break 
  12.     #这里多参考一下,etree元素的方法属性等,包括attrib,text,tag,getchildren()等  
  13.     obj = obj[0].getchildren().pop()  
  14.     for child in obj:  
  15.         for x in child:  
  16.             attr = x.attrib  
  17.             if attr['name']== 'EMAIL;PREF':  
  18.                 value = {'email':x.text}  
  19.                 json.append(value)  
  20.     return json 

#p#

4、收件箱邮件列表

本例子只列出了收件箱邮件列表,如果需要,可根据以上介绍调整fid值,列出发件箱、草稿箱等的邮件列表。程序在windosxp、py2.6环境下调查通过,运行后,会在当前目录下生成三个文件:inboxlistfile.txt记录收件箱邮件列表,addfile.txt记录通讯录,cookies.dat记录cookies。具体代码如下:

  1. #-*- coding:UTF-8 -*-  
  2. import urllib,urllib2,cookielib  
  3. import xml.etree.ElementTree as etree #xml解析类  
  4.  
  5. class Login163:  
  6.    #伪装browser  
  7.     header = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}  
  8.     username = '' 
  9.     passwd = '' 
  10.     cookie = None #cookie对象  
  11.     cookiefile = './cookies.dat' #cookie临时存放地  
  12.     user = '' 
  13.       
  14.     def __init__(self,username,passwd):  
  15.         self.username = username  
  16.         self.passwd = passwd  
  17.         #cookie设置  
  18.         self.cookie = cookielib.LWPCookieJar() #自定义cookie存放  
  19.         opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie))  
  20.         urllib2.install_opener(opener)  
  21.  
  22.    #登陆      
  23.     def login(self):         
  24.  
  25.         #请求参数设置  
  26.         postdata = {  
  27.             'username':self.username,  
  28.             'password':self.passwd,  
  29.             'type':1 
  30.             }  
  31.         postdata = urllib.urlencode(postdata)  
  32.  
  33.         #发起请求  
  34.         req = urllib2.Request(  
  35.                 url='http://reg.163.com/logins.jsp?type=1&product=mail163&url=http://entry.mail.163.com/coremail/fcg/ntesdoor2?lightweight%3D1%26verifycookie%3D1%26language%3D-1%26style%3D1',  
  36.                 data= postdata,#请求数据  
  37.                 headers = self.header #请求头  
  38.             )  
  39.  
  40.         result = urllib2.urlopen(req).read()  
  41.         result = str(result)  
  42.         #print result  
  43.         self.user = self.username.split('@')[0]  
  44.  
  45.         self.cookie.save(self.cookiefile)#保存cookie  
  46.           
  47.         if '登录成功,正在跳转...' in result:  
  48.             #print("%s 你已成功登陆163邮箱。---------n" %(user))  
  49.             flag = True 
  50.         else:  
  51.             flag = '%s 登陆163邮箱失败。'%(self.user)  
  52.              
  53.         return flag  
  54.  
  55.    #获取通讯录  
  56.     def address_list(self):  
  57.  
  58.         #获取认证sid  
  59.         auth = urllib2.Request(  
  60.                 url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1',  
  61.                 headers = self.header  
  62.             )  
  63.         auth = urllib2.urlopen(auth).read()  
  64.  
  65.         #authstr=str(auth)  
  66.         #print authstr  
  67.           
  68.         for i,sid in enumerate(self.cookie):  
  69.             sid = str(sid)  
  70.             #print 'sid:%s' %sid  
  71.             if 'sid' in sid:  
  72.                 sid = sid.split()[1].split('=')[1]  
  73.                 break 
  74.         self.cookie.save(self.cookiefile)  
  75.           
  76.         #请求地址  
  77.         url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username  
  78.         #参数设定(var 变量是必需要的,不然就只能看到:<code>S_OK</code><messages>这类信息)  
  79.         #这里参数也是在firebug下查看的。  
  80.         postdata = {  
  81.             'func':'global:sequential',  
  82.             'showAd':'false',  
  83.             'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr',  
  84.             'uid':self.username,  
  85.             'userType':'browser',  
  86.             'var':'<!--?xml version="1.0"?--><object><array name="items"><object><string name="func">pab:searchContacts</string><object name="var"><array name="order"><object><string name="field">FN</string><boolean name="desc">false</boolean><boolean name="ignoreCase">true</boolean></object></array></object></object><object><string name="func">pab:getAllGroups</string></object></array></object>' 
  87.             }  
  88.         postdata = urllib.urlencode(postdata)  
  89.           
  90.         #组装请求  
  91.         req = urllib2.Request(  
  92.             url = url,  
  93.             data = postdata,  
  94.             headers = self.header  
  95.             )  
  96.         res = urllib2.urlopen(req).read()  
  97.  
  98.         #print str(res)  
  99.           
  100.         #解析XML,转换成json  
  101.         #说明:由于这样请求后163给出的是xml格式的数据,  
  102.         #为了返回的数据能方便使用最好是转为JSON  
  103.         json = []  
  104.         tree = etree.fromstring(res)  
  105.  
  106.           
  107.           
  108.         obj = None 
  109.         for child in tree:  
  110.             if child.tag == 'array':  
  111.                 obj = child              
  112.                 break 
  113.         #这里多参考一下,etree元素的方法属性等,包括attrib,text,tag,getchildren()等  
  114.         obj = obj[0].getchildren().pop()  
  115.         for child in obj:  
  116.             for x in child:  
  117.                 attr = x.attrib  
  118.                 if attr['name']== 'EMAIL;PREF':  
  119.                     value = {'email':x.text}  
  120.                     json.append(value)  
  121.         return json  
  122. #获取收件箱  
  123.     def minbox(self):  
  124.         #获取认证sid  
  125.         auth = urllib2.Request(  
  126.                 url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1',  
  127.                 headers = self.header  
  128.             )  
  129.         auth = urllib2.urlopen(auth).read()  
  130.  
  131.         #authstr=str(auth)  
  132.         #print authstr  
  133.           
  134.         for i,sid in enumerate(self.cookie):  
  135.             sid = str(sid)  
  136.             #print 'sid:%s' %sid  
  137.             if 'sid' in sid:  
  138.                 sid = sid.split()[1].split('=')[1]  
  139.                 break 
  140.         self.cookie.save(self.cookiefile)  
  141.           
  142.           
  143.         url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=mbox:listMessages&showAd=false&userType=browser&uid='+self.username  
  144.           
  145.         postdata = {  
  146.             'func':'global:sequential',  
  147.             'showAd':'false',  
  148.             'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr',  
  149.             'uid':self.username,  
  150.             'userType':'browser',  
  151.             'var':'<!--?xml version="1.0"?--><object><int name="fid">1</int><string name="order">date</string><boolean name="desc">true</boolean><boolean name="topFirst">false</boolean><int name="start">0</int><int name="limit">20</int></object>' 
  152.             }  
  153.         postdata = urllib.urlencode(postdata)  
  154.           
  155.         #组装请求  
  156.         req = urllib2.Request(  
  157.             url = url,  
  158.             data = postdata,  
  159.             headers = self.header  
  160.             )  
  161.         res = urllib2.urlopen(req).read()  
  162.  
  163.         liststr=str(res).split('<object>')#用object进行分割  
  164.         inboxlistcount=len(liststr)-1#记录邮件封数  
  165.         inboxlistfile=open('inboxlistfile.txt','w')  
  166.         t=0  #记录当前第几封信  
  167.         for i in liststr:  
  168.             if 'xml' in i and ' version=' in i:  
  169.                 inboxlistfile.write('inbox 共'+str(inboxlistcount)+'信')  
  170.                 inboxlistfile.write('\n')  
  171.             if 'name="id"' in i:  
  172.                 t=t+1 
  173.                 inboxlistfile.write('第'+str(t)+'封:')  
  174.                 inboxlistfile.write('\n')  
  175.                 #写入from  
  176.                 beginnum=i.find('name="from"')  
  177.                 endnum=i.find('</string>',beginnum)  
  178.                 inboxlistfile.write('From:'+i[beginnum+12:endnum])  
  179.                 inboxlistfile.write('\n')  
  180.                 #写入to  
  181.                 beginnum=i.find('name="to"')  
  182.                 endnum=i.find('</string>',beginnum)  
  183.                 inboxlistfile.write('TO:'+i[beginnum+10:endnum])  
  184.                 inboxlistfile.write('\n')  
  185.                 #写入subject  
  186.                 beginnum=i.find('name="subject"')  
  187.                 endnum=i.find('</string>',beginnum)  
  188.                 inboxlistfile.write('Subject:'+i[beginnum+15:endnum])  
  189.                 inboxlistfile.write('\n')  
  190.                 #写入date:  
  191.                 beginnum=i.find('name="sentDate"')  
  192.                 endnum=i.find('</date>',beginnum)  
  193.                 inboxlistfile.write('Date:'+i[beginnum+16:endnum])  
  194.                 inboxlistfile.write('\n')  
  195.                 if 'name="read">true' in i:  
  196.                     inboxlistfile.write('邮件状态:已读')  
  197.                     inboxlistfile.write('\n')  
  198.                 else:  
  199.                     inboxlistfile.write('邮件状态:未读')  
  200.                     inboxlistfile.write('\n')  
  201.                 #写用邮件尺寸  
  202.                 beginnum=i.find('name="size"')  
  203.                 endnum=i.find('</int>',beginnum)  
  204.                 inboxlistfile.write('邮件尺寸:'+i[beginnum+12:endnum])  
  205.                 inboxlistfile.write('\n')  
  206.                 #写入邮件编号,用于下载邮件  
  207.                 beginnum=i.find('name="id"')  
  208.                 endnum=i.find('</string>',beginnum)  
  209.                 inboxlistfile.write('邮件编号:'+i[beginnum+10:endnum])  
  210.                 inboxlistfile.write('\n\n')  
  211.                   
  212.         inboxlistfile.close()  
  213.                   
  214.           
  215.           
  216. if __name__=='__main__':  
  217.     print("Edit @xiaowuyi V1.0  http://www.cnblogs.com/xiaowuyi")  
  218.     login = Login163('XXXX@163.com','AAAAA')  
  219.     flag = login.login()  
  220.     if type(flag) is bool:  
  221.       
  222.     #login.letterdown()  
  223.         print("登陆成功,正在下载列表和通讯录………………")  
  224.         login.minbox()  
  225.         res = login.address_list()  
  226.         addfile=open('addfile.txt','w')  
  227.         for x in res:  
  228.             addfile.write(x['email'])  
  229.         addfile.close()  
  230.         print("已完成")  
  231.     else:  
  232.         print(flag) 

原文链接:http://www.cnblogs.com/xiaowuyi/archive/2012/05/21/2511428.html

责任编辑:张伟 来源: 小五义的博客
相关推荐

2018-07-13 08:56:16

编程语言Python

2021-06-02 14:45:52

远程服务器Python

2011-07-20 09:27:37

Scala

2021-10-09 08:07:56

Python 3.11Microsoft S应用商店

2011-03-08 13:52:25

Proftpd

2010-04-21 17:20:03

Unix远程

2009-07-16 15:14:27

WebWork用户登陆

2012-11-07 10:09:11

组件技术OAuth授权登陆

2012-02-14 10:46:15

WP Marketpl杂志月刊

2010-08-18 08:21:49

Adobe AIRAndroid

2009-12-15 17:28:58

戴尔互联课堂

2009-02-18 22:19:24

AD用户登陆实现限制

2011-02-25 17:07:25

2020-03-01 17:04:17

Python数据运维

2009-08-14 09:40:55

MyEclipse整合

2010-11-22 17:41:39

Ubuntu OneWindows

2009-01-12 17:34:11

服务器虚拟化VMware

2017-11-14 08:25:36

数据库MySQL安全登陆

2009-08-16 20:24:59

linux命令行登陆linux命令行linux命令

2012-01-10 11:52:39

宏碁AcerCloud宏基Web服务器
点赞
收藏

51CTO技术栈公众号