- A+
所属分类:python笔记
需要统计代理商那边每月刷的好评数,手动统计太慢了,写了个Python脚本来干这活儿,每月跑一次就行了,没啥技术含量,只是为了代码备忘。
目前只能抓取OPPO和MEIZU的,小米需要手机抓包没去搞,腾讯的AJAX不好抓放弃了。
#coding=utf-8 #!/usr/local/bin/python import requests,time,random,pycurl,json,StringIO,datetime,re,threading,urllib,Queue import os import sys from urlparse import * from lxml import etree import hashlib import jieba import jieba.analyse import codecs #为了生成gbk编码的文件 import chardet # from lxml import * from multiprocessing.dummy import Pool as ThreadPool #import MySQLdb as mdb reload(sys) sys.setdefaultencoding('utf-8') today = datetime.date.today() #############oppo############# #############oppo############# #############oppo############# f = open('oppo.txt','w') for i in xrange(1,36): r = requests.get('http://store.oppomobile.com/comment/list.json?id=11004487&page=%s'%(i)) html = r.text html_json = json.loads(html) commentsList = html_json['commentsList'] for a in commentsList: id = a['id'] word = a['word'].replace('\n',' ') # print i,id,word userGrade = a['userGrade'] version = a['version'] userNickName = a['userNickName'] source = a['source'] createDate = a['createDate'] print i,id,createDate,userGrade,version,userNickName,source,word f.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n'%(i,id,createDate,userGrade,version,userNickName,source,word)) f.flush() #############meizu############# #############meizu############# #############meizu############# def convert_to_cn(text): # 需要将 × 这种先做补全,× text = re.sub(r'&#x([A-F0-9]{2});', r'�\1;', text) return text.replace('&#x', '\u').replace(';', '').decode('unicode-escape').encode('utf-8') f = open('meizu.txt','w') for i in xrange(1,246): try: i2 = i*10 r = requests.get('http://app.meizu.com/apps/public/evaluate/list?app_id=536002&start=%s&max=10'%(i2)) html = r.text html_json = json.loads(html) # print html_json commentsList = html_json['value']['list'] for a in commentsList: create_time = a['create_time'] star = a['star'] version_name = a['version_name'] user_name = convert_to_cn(a['user_name']) comment = convert_to_cn(a['comment']) print i,create_time,star,version_name,user_name,comment f.write('%s\t%s\t%s\t%s\t%s\t%s\n'%(i,create_time,star,version_name,user_name,comment)) f.flush() except: pass