[微博]redis 添加 cookie 成功请求获取返参

This commit is contained in:
DELL 2026-01-20 16:36:44 +08:00
parent 0008e619d1
commit 92c8cdf9b2

View File

@ -2,10 +2,12 @@
import json import json
import time import time
import redis
import scrapy import scrapy
from redisbloom.client import Client from redisbloom.client import Client
from MediaSpiders.items import MediaspidersItem from MediaSpiders.items import MediaspidersItem
from MediaSpiders.spiders.TwitterUserSpider import form_cookie_dict
from MediaSpiders.utils.http_utils import http_post from MediaSpiders.utils.http_utils import http_post
from MediaSpiders.utils.string_utils import find_text from MediaSpiders.utils.string_utils import find_text
from MediaSpiders.utils.time_utils import get_time_stamp, get_current_timestamp from MediaSpiders.utils.time_utils import get_time_stamp, get_current_timestamp
@ -59,6 +61,11 @@ class WeiboSpider(scrapy.Spider):
account_rsp = json.loads( account_rsp = json.loads(
http_post(account_query_api, json.dumps(post_data), headers={"Content-Type": "application/json"}).text) http_post(account_query_api, json.dumps(post_data), headers={"Content-Type": "application/json"}).text)
self.simhash_filter_key = self.settings['WEIBO_SIMHASH_FILTER_KEY'] self.simhash_filter_key = self.settings['WEIBO_SIMHASH_FILTER_KEY']
# 从 redis 中 获取 微博所需的 cookie
cookie_string = redis.Redis(host=self.settings['REDIS_HOST'], port=self.settings['REDIS_PORT'],
password=self.settings['REDIS_PWD']).get("MediaSpiders:Weibo_Cookies").decode()
self.cookie_dict = form_cookie_dict(cookie_string)
all_user_info = [] all_user_info = []
if account_rsp['code'] == 200: if account_rsp['code'] == 200:
all_user_info = account_rsp['content'] all_user_info = account_rsp['content']
@ -67,7 +74,10 @@ class WeiboSpider(scrapy.Spider):
if uid[:6] != '107603': if uid[:6] != '107603':
uid = f'107603{uid}' uid = f'107603{uid}'
yield scrapy.Request('https://m.weibo.cn/api/container/getIndex?containerid=%s' % uid, yield scrapy.Request('https://m.weibo.cn/api/container/getIndex?containerid=%s' % uid,
callback=self.parse, meta={'currentCount': 0, 'uid': uid}) callback=self.parse,
meta={'currentCount': 0, 'uid': uid},
cookies=self.cookie_dict
)
def parse(self, response): def parse(self, response):
rsp = json.loads(response.text) rsp = json.loads(response.text)