python爬取携程评论
import requestsimport jsonimport timepagesize=300headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36',}posturl
·
import requests import json import time pagesize=300 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36', } posturl = "https://m.ctrip.com/restapi/soa2/13444/json/getCommentCollapseList?_fxpcqlniredt=09031099112624127484" def getdata(): j = 1 for i in range(1,pagesize): request = { 'arg': {'channelType': '2', 'collapseType': '0', 'commentTagId': '0', 'pageIndex': str(i), 'pageSize': '10', 'poiId': '75916', 'sortType': '3', 'sourceType': '1', 'starType': '0'}, 'head': {'auth': "", 'cid': "09031099112624127484", 'ctok': "", 'cver': "1.0", 'extension': [], 'lang': "01", 'sid': "8888", 'syscode': "09", 'xsid': ""} } time.sleep(3) html = requests.post(posturl, data=json.dumps(request), headers=headers) html1 = json.loads(html.text) print('正在爬取第'+str(i)+'页') items = html1['result']['items'] #保存文件 with open("xiecheng.csv", "a", newline='', encoding='GB18030') as f: for k in items: f.write(str(k['commentId'])) f.write("\t") f.write(k['content']) f.write("\n") j += 1 if __name__ == '__main__': getdata()
开放原子开发者工作坊旨在鼓励更多人参与开源活动,与志同道合的开发者们相互交流开发经验、分享开发心得、获取前沿技术趋势。工作坊有多种形式的开发者活动,如meetup、训练营等,主打技术交流,干货满满,真诚地邀请各位开发者共同参与!
更多推荐
已为社区贡献1条内容
所有评论(0)