[微博]redis 添加 cookie 成功请求获取返参

This commit is contained in:
DELL 2026-01-20 16:36:44 +08:00
parent 0008e619d1
commit 92c8cdf9b2

View File

@ -2,10 +2,12 @@
import json
import time
import redis
import scrapy
from redisbloom.client import Client
from MediaSpiders.items import MediaspidersItem
from MediaSpiders.spiders.TwitterUserSpider import form_cookie_dict
from MediaSpiders.utils.http_utils import http_post
from MediaSpiders.utils.string_utils import find_text
from MediaSpiders.utils.time_utils import get_time_stamp, get_current_timestamp
@ -59,6 +61,11 @@ class WeiboSpider(scrapy.Spider):
account_rsp = json.loads(
http_post(account_query_api, json.dumps(post_data), headers={"Content-Type": "application/json"}).text)
self.simhash_filter_key = self.settings['WEIBO_SIMHASH_FILTER_KEY']
# 从 redis 中 获取 微博所需的 cookie
cookie_string = redis.Redis(host=self.settings['REDIS_HOST'], port=self.settings['REDIS_PORT'],
password=self.settings['REDIS_PWD']).get("MediaSpiders:Weibo_Cookies").decode()
self.cookie_dict = form_cookie_dict(cookie_string)
all_user_info = []
if account_rsp['code'] == 200:
all_user_info = account_rsp['content']
@ -67,7 +74,10 @@ class WeiboSpider(scrapy.Spider):
if uid[:6] != '107603':
uid = f'107603{uid}'
yield scrapy.Request('https://m.weibo.cn/api/container/getIndex?containerid=%s' % uid,
callback=self.parse, meta={'currentCount': 0, 'uid': uid})
callback=self.parse,
meta={'currentCount': 0, 'uid': uid},
cookies=self.cookie_dict
)
def parse(self, response):
rsp = json.loads(response.text)