import requests
import json
import time
pagesize=300
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36',
}

posturl = "https://m.ctrip.com/restapi/soa2/13444/json/getCommentCollapseList?_fxpcqlniredt=09031099112624127484"

def getdata():
    j = 1
    for i in range(1,pagesize):
        request = {
            'arg': {'channelType': '2',
                    'collapseType': '0',
                    'commentTagId': '0',
                    'pageIndex': str(i),
                    'pageSize': '10',
                    'poiId': '75916',
                    'sortType': '3',
                    'sourceType': '1',
                    'starType': '0'},

            'head': {'auth': "",
                     'cid': "09031099112624127484",
                     'ctok': "",
                     'cver': "1.0",
                     'extension': [],
                     'lang': "01",
                     'sid': "8888",
                     'syscode': "09",
                     'xsid': ""}
        }

        time.sleep(3)
        html = requests.post(posturl, data=json.dumps(request), headers=headers)
        html1 = json.loads(html.text)
        print('正在爬取第'+str(i)+'页')
        items = html1['result']['items']
        #保存文件
        with open("xiecheng.csv", "a", newline='', encoding='GB18030') as f:
            for k in items:
                f.write(str(k['commentId']))
                f.write("\t")
                f.write(k['content'])
                f.write("\n")
                j += 1

if __name__ == '__main__':
    getdata()
Logo

开放原子开发者工作坊旨在鼓励更多人参与开源活动,与志同道合的开发者们相互交流开发经验、分享开发心得、获取前沿技术趋势。工作坊有多种形式的开发者活动,如meetup、训练营等,主打技术交流,干货满满,真诚地邀请各位开发者共同参与!

更多推荐