From 92c8cdf9b20dc7b53b8f4c1b48c3313ae1b841c5 Mon Sep 17 00:00:00 2001 From: DELL Date: Tue, 20 Jan 2026 16:36:44 +0800 Subject: [PATCH] =?UTF-8?q?[=E5=BE=AE=E5=8D=9A]redis=20=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=20cookie=20=E6=88=90=E5=8A=9F=E8=AF=B7=E6=B1=82=E8=8E=B7?= =?UTF-8?q?=E5=8F=96=E8=BF=94=E5=8F=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../MediaSpiders/spiders/WeiboUserSpider.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/spiders/MediaSpiders/MediaSpiders/spiders/WeiboUserSpider.py b/spiders/MediaSpiders/MediaSpiders/spiders/WeiboUserSpider.py index d433a4a..f0fed88 100644 --- a/spiders/MediaSpiders/MediaSpiders/spiders/WeiboUserSpider.py +++ b/spiders/MediaSpiders/MediaSpiders/spiders/WeiboUserSpider.py @@ -2,10 +2,12 @@ import json import time +import redis import scrapy from redisbloom.client import Client from MediaSpiders.items import MediaspidersItem +from MediaSpiders.spiders.TwitterUserSpider import form_cookie_dict from MediaSpiders.utils.http_utils import http_post from MediaSpiders.utils.string_utils import find_text from MediaSpiders.utils.time_utils import get_time_stamp, get_current_timestamp @@ -59,6 +61,11 @@ class WeiboSpider(scrapy.Spider): account_rsp = json.loads( http_post(account_query_api, json.dumps(post_data), headers={"Content-Type": "application/json"}).text) self.simhash_filter_key = self.settings['WEIBO_SIMHASH_FILTER_KEY'] + # 从 redis 中 获取 微博所需的 cookie + cookie_string = redis.Redis(host=self.settings['REDIS_HOST'], port=self.settings['REDIS_PORT'], + password=self.settings['REDIS_PWD']).get("MediaSpiders:Weibo_Cookies").decode() + self.cookie_dict = form_cookie_dict(cookie_string) + all_user_info = [] if account_rsp['code'] == 200: all_user_info = account_rsp['content'] @@ -67,7 +74,10 @@ class WeiboSpider(scrapy.Spider): if uid[:6] != '107603': uid = f'107603{uid}' yield scrapy.Request('https://m.weibo.cn/api/container/getIndex?containerid=%s' % uid, - callback=self.parse, meta={'currentCount': 0, 'uid': uid}) + callback=self.parse, + meta={'currentCount': 0, 'uid': uid}, + cookies=self.cookie_dict + ) def parse(self, response): rsp = json.loads(response.text)