diff --git a/spiders/MediaSpiders/MediaSpiders/spiders/WeiboUserSpider.py b/spiders/MediaSpiders/MediaSpiders/spiders/WeiboUserSpider.py index d433a4a..f0fed88 100644 --- a/spiders/MediaSpiders/MediaSpiders/spiders/WeiboUserSpider.py +++ b/spiders/MediaSpiders/MediaSpiders/spiders/WeiboUserSpider.py @@ -2,10 +2,12 @@ import json import time +import redis import scrapy from redisbloom.client import Client from MediaSpiders.items import MediaspidersItem +from MediaSpiders.spiders.TwitterUserSpider import form_cookie_dict from MediaSpiders.utils.http_utils import http_post from MediaSpiders.utils.string_utils import find_text from MediaSpiders.utils.time_utils import get_time_stamp, get_current_timestamp @@ -59,6 +61,11 @@ class WeiboSpider(scrapy.Spider): account_rsp = json.loads( http_post(account_query_api, json.dumps(post_data), headers={"Content-Type": "application/json"}).text) self.simhash_filter_key = self.settings['WEIBO_SIMHASH_FILTER_KEY'] + # 从 redis 中 获取 微博所需的 cookie + cookie_string = redis.Redis(host=self.settings['REDIS_HOST'], port=self.settings['REDIS_PORT'], + password=self.settings['REDIS_PWD']).get("MediaSpiders:Weibo_Cookies").decode() + self.cookie_dict = form_cookie_dict(cookie_string) + all_user_info = [] if account_rsp['code'] == 200: all_user_info = account_rsp['content'] @@ -67,7 +74,10 @@ class WeiboSpider(scrapy.Spider): if uid[:6] != '107603': uid = f'107603{uid}' yield scrapy.Request('https://m.weibo.cn/api/container/getIndex?containerid=%s' % uid, - callback=self.parse, meta={'currentCount': 0, 'uid': uid}) + callback=self.parse, + meta={'currentCount': 0, 'uid': uid}, + cookies=self.cookie_dict + ) def parse(self, response): rsp = json.loads(response.text)