python爬取人人车网站数据

使用python对人人车网站进行爬取# 导入库requests、re、xlwtimport requestsimport reimport xlwtheader={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987...

广州测试

1889人浏览 · 2020-02-23 16:30:12

广州测试 · 2020-02-23 16:30:12 发布

使用python对人人车网站进行爬取

# 导入库requests、re、xlwt
import requests
import re
import xlwt
header={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36"}
car_url="https://www.renrenche.com/ganzhou/ershouche/le-suv_chaozhi/?le=suv&tag=gr&plog_id=a78e0769211c88d019cac53778147f92"
CarData=requests.get(car_url,headers=header)
CarData.encoding='utf-8'                      #防止出现乱码utf-8或者gbk
# print(CarData.text)

workBook=xlwt.Workbook(encoding='utf-8')       #创建Excel
workSheet=workBook.add_sheet('res')            #创建字表
colName=['车型品牌','路程数','首付价格','成交价格']
for col in range(len(colName)):
    workSheet.write(0,col,colName[col])



RenRenCar=re.findall('<li class="span6 list-item car-item " data-is-near="0" style="">(.*?)</li>',CarData.text,re.S)    #获取所有的数据
line=1
for m in RenRenCar:
    # print(m)
    # 获取人人车车名
    CarName=re.findall('<h3 class="rrcttff6fc32688c0c8524aa2cf4b5c01d508b">(.*?)</h3>',m,re.S)
    for name in CarName:
        print(name)
        workSheet.write(line,0,name)
    #获取路程数
    CarTime=re.findall('<em class="separator">/</em>(.*?)</span>',m,re.S)[0].strip()
    print(CarTime)
    workSheet.write(line,1,CarTime)
    #获取首付信息
    CarPyment=re.findall('<div class="down-payment">首付<div class="m-l">(.*?)</div>万</div>',m,re.S)[0].strip()
    print('首付',CarPyment,'万')
    workSheet.write(line,2,CarPyment)
    #获取价格
    CarTairff=re.findall('"price">(.*?)<span>万</span>',m,re.S)[0].strip()
    print(CarTairff,'万')
    workSheet.write(line,3,CarTairff)
    line+=1

workBook.save(r'C:\Users\Administrator\Desktop\人人车.xls')

效果图：