Fiddler安装和设置

安装

Fiddler 安装包可以从这里获取,如果失效了可以自己网上找一个安装。

链接:https://pan.baidu.com/s/10tYQ-uL6HMddkOcIKnWEKQ?pwd=d1io 

然后就是点击安装就好了,没什么好多说的。

启用HTTPS捕获

进入软件界面,点击 Tools -> Options -> HTTPS 启用捕获 https 请求并解密。

证书信任

设置信任根证书,不然进行抓包捕获时,其他网页就访问不了了。

证书安装

有时候,如果证书安装不正确,可能导致抓取 https 失败。如果你发现上面已经设置以后,仍然抓取不到 https 的话,可以尝试使用工具重新生成证书。

可以下载 fiddlercertmaker.exe 自动生成证书,具体安装过程可参考:Fiddler死活抓不了HTTPS包解决办法_fiddler 抓包 itune 310 错误-CSDN博客

链接:https://pan.baidu.com/s/19G6aBHtxQU4ViSicWw2NOw?pwd=y3uh 

设置自动转发

设置指定 url 自动转发到本地,我这里是自动把请求转发到了我本地一个 Flask 搭建的服务,设置好以后进行保存(转发地址记得和你服务的地址保持一致)。

设置自动转发 https://search.weixin.qq.com/cgi-bin/wxaweb/wxindexfluctuations 的目的主要是为了获取数据请求参数中的 openid 和 search_key,因为我需要这两个请求参数去构造新的 body。

Unmatched requests passthrough 一定要勾选上——也就是不影响其他未匹配的请求

开启捕获

可以从 File -> Capture Traffic 开启捕获,也可以用 F12 快捷键开启捕获,当左下角有 Capturing 字样时,表示捕获已开启。

然后就可以正常捕获抓取 https 请求了

数据抓取处理

搭建并启动本地服务

可以自己在本地简单写一个服务接收和转发的请求并处理。我这里构造了两个 body 去分别获取 指数趋势数据来源

如果出现 Your proxy appears to only use HTTP and not HTTPS 报错,把转发 url 修改成 http 即可。

# coding:utf-8
import csv
import datetime
import json
import os
import traceback

import pygal
from pygal.style import Style

import requests
import urllib3
from flask import Flask, request

app = Flask(__name__)

time_indexes_map = {
    "time": "日期",
    "score": "指数"
}

channel_scores_map = {
    "finder_score": "视频号",
    "live_score": "直播",
    "mpdoc_score": "公众号",
    "query_score": "搜一搜",
    "extlink_score": "网页",
    "ad_score": "其他",
    "total_score": "总计",
    "score_exp": "score_exp",  # 这个字段没找到对应中文意义,先以原始key值映射
}

headers = {'Host': 'search.weixin.qq.com',
           'Connection': 'keep-alive',
           # 'Content-Length': '182',
           'xweb_xhr': '1',
           'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF WindowsWechat(0x6309092b) XWEB/9129',
           'Content-Type': 'application/json',
           'Accept': '*/*',
           'Sec-Fetch-Site': 'cross-site',
           'Sec-Fetch-Mode': 'cors',
           'Sec-Fetch-Dest': 'empty',
           'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/53/page-frame.html',
           'Accept-Encoding': 'gzip, deflate, br',
           'Accept-Language': 'zh-CN,zh;q=0.9',
           }


# class ValueColors(pygal.style.Style):
#     value_colors = ("#f6c443", "#ff6146", "#7c160", "#4fadf8", "#a9e87a", "#eda150")


class ResultHandler:
    def __init__(self, file_save_dir):
        self.file_save_dir = file_save_dir
        self._init_file_save_dir()

    def _init_file_save_dir(self):
        os.makedirs(self.file_save_dir, exist_ok=True)

    def draw_line(self, title, time_indexes, last_day=7):
        time_indexes = time_indexes[-last_day:]
        date_chart = pygal.StackedLine(fill=True, interpolate='hermite', x_label_rotation=-20, style=pygal.style.LightGreenStyle)
        date_chart.x_labels = [str(x["time"])[4:] for x in time_indexes]
        date_chart.add(title, [x["score"] for x in time_indexes])
        file_path = os.path.join(self.file_save_dir, "line.svg")
        date_chart.render_to_file(file_path)

    def draw_pie(self, title, channel_scores, last_day=7):
        # 颜色对应关系可以使用 pyautogui 的 getpixel 取色器获取
        # colors_map = {
        #     "ad_score": "#eda150",
        #     "extlink_score": "#a9e87a",
        #     "finder_score": "#f6c443",
        #     "live_score": "#ff6146",
        #     "mpdoc_score": "#7c160",
        #     "query_score": "#4fadf8"
        # }
        channel_scores = channel_scores[-last_day:]
        channel_score = channel_scores.pop()
        for cs in channel_scores:
            for key, score in cs.items():
                channel_score[key] += score
        # pie_chart = pygal.Pie(inner_radius=0.5, style=pygal.style.LightSolarizedStyle)
        pie_chart = pygal.Pie(inner_radius=0.5)
        pie_chart.title = title
        # print(channel_score)
        total_score = channel_score["total_score"]
        for key, score in channel_score.items():
            if key in ["score_exp", "total_score"]:
                continue
            percent = float("{:.2f}".format(100 * score / total_score))
            pie_chart.add(channel_scores_map[key], percent)
        file_path = os.path.join(self.file_save_dir, "pie.svg")
        pie_chart.render_to_file(file_path)

    def write_csv(self, title, rows: list):
        if len(rows) == 0:
            return True
        fieldnames = list(rows[0].keys())
        fieldnames = sorted(fieldnames, key=lambda x: len(x))
        file = title + "_" + datetime.datetime.now().strftime("%Y%m%d") + ".csv"
        file_path = os.path.join(self.file_save_dir, file)
        try:
            with open(file_path, 'w', newline='', encoding='utf-8') as f:
                writer = csv.DictWriter(f, fieldnames)
                if set(fieldnames) == set(time_indexes_map):
                    writer = csv.DictWriter(f, time_indexes_map.keys())
                    writer.writerow(time_indexes_map)
                elif set(fieldnames) == set(channel_scores_map):
                    writer = csv.DictWriter(f, channel_scores_map.keys())
                    writer.writerow(channel_scores_map)
                else:
                    writer.writeheader()
                for row in rows:
                    writer.writerow(row)
        except Exception as e:
            print(e)
            traceback.format_exc()
            return False
        return True


@app.route('/post_data', methods=['POST'])
def post():
    if request.method == 'POST':
        today = datetime.datetime.now().strftime("%Y%m%d")
        file_save_dir = f"./files/{today}"
        result_handler = ResultHandler(file_save_dir)
        urllib3.disable_warnings()
        data = request.get_json()
        # print(data)
        openid = data.get("openid")
        search_key = data.get("search_key")
        query = [data.get("query")]
        end_ymd = datetime.datetime.now().strftime("%Y%m%d")
        start_ymd = (datetime.datetime.now() - datetime.timedelta(365)).strftime("%Y%m%d")
        # forward_url = 'https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex'
        forward_url = 'http://search.weixin.qq.com/cgi-bin/wxaweb/wxindex'
        # 指数趋势
        json_data = {'openid': openid, 'search_key': search_key, 'cgi_name': 'GetDefaultIndex',
                     'query': query, 'compound_word': [], 'start_ymd': start_ymd, 'end_ymd': end_ymd}
        response = requests.post(forward_url, json=json_data, headers=headers, verify=False)
        response_data = response.json()
        # json.dump(response_data, open("test1.json", "w"), indent=2)
        title = response_data["content"]["resp_list"][0]["query"]
        time_indexes = response_data["content"]["resp_list"][0]["indexes"][0]["time_indexes"]
        print(time_indexes[:2])
        title_indexes = title + "_指数趋势"
        result_handler.draw_line(title_indexes, time_indexes, 30)
        result_handler.write_csv(title_indexes, time_indexes)
        # # 数据来源
        json_data2 = {'openid': openid, 'search_key': search_key, 'cgi_name': 'GetMultiChannel',
                      'query': query, 'start_ymd': start_ymd, 'end_ymd': end_ymd}
        response = requests.post(forward_url, json=json_data2, headers=headers, verify=False)
        response_data = response.json()
        # json.dump(response_data, open("test2.json", "w"), indent=2)
        result_list = response_data["content"]["result_list"]
        channel_scores = [c["channel_score"] for c in result_list]
        print(channel_scores[:2])
        title_scores = title + "_数据来源"
        result_handler.draw_pie(title_scores, channel_scores, 30)
        result_handler.write_csv(title_scores, channel_scores)
    return {}


if __name__ == '__main__':
    app.run(host="127.0.0.1", debug=True)

小程序搜索关键字

  • 进入电脑端微信
  • 搜索 微信指数 小程序
  • 进入小程序,输入想要搜索的关键词(比如:和平精英)

数据图表展示

微信图表展示如下:

我们自己使用 pygal 画的图如下(svg 图用浏览器打开),对比发现,除了插值导致的光滑度不一样,图的整体走势是一致的:

Logo

开放原子开发者工作坊旨在鼓励更多人参与开源活动,与志同道合的开发者们相互交流开发经验、分享开发心得、获取前沿技术趋势。工作坊有多种形式的开发者活动,如meetup、训练营等,主打技术交流,干货满满,真诚地邀请各位开发者共同参与!

更多推荐