同步提交
This commit is contained in:
parent
36fa73bd12
commit
0e89a4f642
@ -77,6 +77,7 @@
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.cloud</groupId>
|
||||
<artifactId>spring-cloud-stream-test-support</artifactId>
|
||||
|
||||
@ -477,4 +477,114 @@ public class DatabaseConnector {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 新闻导出
|
||||
*/
|
||||
public void exportToXlsxTest(String startTime) {
|
||||
try {
|
||||
Path dirPath = Paths.get(hotSearchExcelOutputPath);
|
||||
if (!Files.exists(dirPath)) {
|
||||
Files.createDirectories(dirPath);
|
||||
}
|
||||
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd"));
|
||||
String fileName = "data_hot_search-" + timestamp + "-001.xlsx";
|
||||
Path filePath = dirPath.resolve(fileName);
|
||||
|
||||
List<EsDataHotSearchView> esDataHotSearchViewList = esDataHotSearchRepository.findAllByEsLoadtimeAfter(startTime);
|
||||
if (!esDataHotSearchViewList.isEmpty()) {
|
||||
Field[] fields = esDataHotSearchViewList.get(0).getClass().getDeclaredFields(); // 通过反射获取类的成员信息,并使用这些类成员为后续生成的excel表头做准备
|
||||
try (Workbook workbook = new XSSFWorkbook();
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream()) {
|
||||
|
||||
Sheet sheet = workbook.createSheet("data");
|
||||
|
||||
// 创建表头
|
||||
Row headerRow = sheet.createRow(0);
|
||||
CellStyle headerStyle = workbook.createCellStyle(); // 创建单元格
|
||||
headerStyle.setFillForegroundColor(IndexedColors.LIGHT_BLUE.getIndex());
|
||||
headerStyle.setFillPattern(FillPatternType.SOLID_FOREGROUND);
|
||||
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
Cell cell = headerRow.createCell(i);
|
||||
String formField = formField(fields[i]);
|
||||
cell.setCellValue(formField);
|
||||
cell.setCellStyle(headerStyle);
|
||||
}
|
||||
// 填充数据
|
||||
int rowNum = 1;
|
||||
for (EsDataHotSearchView item : esDataHotSearchViewList) {
|
||||
Row row = sheet.createRow(rowNum++);
|
||||
logger.debug("导出excel第" + rowNum + "行");
|
||||
// 0: esSid
|
||||
row.createCell(0).setCellValue(item.getEsSid() != null ? item.getEsSid() : "");
|
||||
// 1: esUrltime
|
||||
row.createCell(1).setCellValue(item.getEsUrltime() != null ? item.getEsUrltime() : "");
|
||||
|
||||
// 2: esCarriertype
|
||||
row.createCell(2).setCellValue(item.getEsCarriertype() != null ? item.getEsCarriertype() : "");
|
||||
|
||||
// 3: esSitename
|
||||
row.createCell(3).setCellValue(item.getEsSitename() != null ? item.getEsSitename() : "");
|
||||
|
||||
// 4: esSimrank
|
||||
row.createCell(4).setCellValue(item.getEsSimrank() != null ? String.valueOf(Float.valueOf(item.getEsSimrank()).intValue()) : "");
|
||||
|
||||
// 5: esUrltitle
|
||||
String esUrltitle = item.getEsUrltitle();
|
||||
if (esUrltitle != null && esUrltitle.length() > 10000) {
|
||||
row.createCell(5).setCellValue(esUrltitle.substring(0, 10000));
|
||||
} else {
|
||||
row.createCell(5).setCellValue(esUrltitle != null ? esUrltitle : "");
|
||||
}
|
||||
|
||||
// 6: esUrlcontent
|
||||
String esUrlcontent = item.getEsUrlcontent();
|
||||
if (esUrlcontent != null && esUrlcontent.length() > 10000) {
|
||||
row.createCell(6).setCellValue(esUrlcontent.substring(0, 10000));
|
||||
} else {
|
||||
row.createCell(6).setCellValue(esUrlcontent != null ? esUrlcontent : "");
|
||||
}
|
||||
|
||||
// 7: esUrlname
|
||||
row.createCell(7).setCellValue(item.getEsUrlname() != null ? item.getEsUrlname() : "");
|
||||
|
||||
// 8: esHkey
|
||||
row.createCell(8).setCellValue(item.getEsHkey() != null ? item.getEsHkey() : "");
|
||||
|
||||
// 9: esLasttime
|
||||
String esLasttime = extractFilenamesFromJsonArray(item.getEsLasttime());
|
||||
row.createCell(9).setCellValue(esLasttime);
|
||||
|
||||
|
||||
// 10: esHeat
|
||||
row.createCell(10).setCellValue(item.getEsHeat() != null ? item.getEsHeat() : "");
|
||||
|
||||
// 11: esLasttime
|
||||
String esLoadtime = extractFilenamesFromJsonArray(item.getEsLoadtime());
|
||||
row.createCell(11).setCellValue(esLoadtime);
|
||||
}
|
||||
logger.info("完成excel数据写入,共" + rowNum + "行");
|
||||
|
||||
// 自动调整列宽
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
sheet.autoSizeColumn(i);
|
||||
}
|
||||
|
||||
workbook.write(out);
|
||||
|
||||
try (FileOutputStream fos = new FileOutputStream(filePath.toFile())) {
|
||||
workbook.write(fos);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
logger.info("excel导出完成!");
|
||||
} else logger.info("获取数据为空,excel未导出");
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3,6 +3,7 @@
|
||||
BOT_NAME = 'MediaSpiders'
|
||||
|
||||
LOG_LEVEL = 'INFO'
|
||||
# LOG_LEVEL = 'DEBUG'
|
||||
|
||||
SPIDER_MODULES = ['MediaSpiders.spiders']
|
||||
NEWSPIDER_MODULE = 'MediaSpiders.spiders'
|
||||
@ -110,8 +111,8 @@ CUSTOM_USER_AGENT = [
|
||||
|
||||
# 部署在外网采集fb时使用selenium_chrome
|
||||
SELENIUM_DRIVER_NAME = 'chrome'
|
||||
# SELENIUM_DRIVER_EXECUTABLE_PATH = 'local'
|
||||
SELENIUM_DRIVER_EXECUTABLE_PATH = 'http://144.34.185.108:28098'
|
||||
SELENIUM_DRIVER_EXECUTABLE_PATH = 'local'
|
||||
# SELENIUM_DRIVER_EXECUTABLE_PATH = 'http://144.34.185.108:28098'
|
||||
SELENIUM_DRIVER_ARGUMENTS = [
|
||||
'--headless',
|
||||
'--no-sandbox',
|
||||
|
||||
@ -76,14 +76,15 @@ class WechatLinksFetcherSpider(scrapy.Spider):
|
||||
|
||||
def parse(self, response):
|
||||
driver = response.request.meta['driver']
|
||||
cookie_list = self.redis_client.lrange("MediaSpiders:WeChatLinksFetcher_Cookies", 0, -1)
|
||||
cookies_key = "MediaSpiders:WeChatLinksFetcher_Cookies"
|
||||
cookie_list = self.redis_client.lrange(cookies_key, 0, -1)
|
||||
cookie_parts = [
|
||||
item.decode('utf-8') if isinstance(item, bytes) else str(item)
|
||||
for item in cookie_list
|
||||
]
|
||||
|
||||
# 尝试使用 Redis 中的 cookie 登录
|
||||
for item in cookie_parts:
|
||||
# 遍历cookies,记录当前索引
|
||||
for cookie_index, item in enumerate(cookie_parts):
|
||||
try:
|
||||
driver.delete_all_cookies()
|
||||
driver.get('https://mp.weixin.qq.com/')
|
||||
@ -99,7 +100,7 @@ class WechatLinksFetcherSpider(scrapy.Spider):
|
||||
else:
|
||||
logger.warning(f"跳过 cookie: {name}")
|
||||
|
||||
logger.info(f"成功添加 {success_count}/{len(cookie_dict)} 个 cookie")
|
||||
logger.info(f"成功添加 {success_count}/{len(cookie_dict)} 个 cookie (索引: {cookie_index})")
|
||||
|
||||
# 验证 cookie 是否有效
|
||||
driver.refresh()
|
||||
@ -204,6 +205,9 @@ class WechatLinksFetcherSpider(scrapy.Spider):
|
||||
if err_msg == "freq control" or err_msg == "invalid session":
|
||||
logger.info("接口频率限制,稍后再试,本次获取结束")
|
||||
break_flag = True
|
||||
|
||||
# 删除当前使用的cookie
|
||||
self._remove_invalid_cookie(cookies_key, cookie_index)
|
||||
break
|
||||
|
||||
if not break_flag:
|
||||
@ -222,12 +226,32 @@ class WechatLinksFetcherSpider(scrapy.Spider):
|
||||
if rsp_body['base_resp']['err_msg'] == "freq control":
|
||||
logger.info("接口频率限制,稍后再试,本次获取结束")
|
||||
break_flag = True
|
||||
|
||||
# 删除当前使用的cookie
|
||||
self._remove_invalid_cookie(cookies_key, cookie_index)
|
||||
break
|
||||
except Exception as e:
|
||||
logger.info(repr(e))
|
||||
self.redis_client.close()
|
||||
driver.quit()
|
||||
|
||||
def _remove_invalid_cookie(self, cookies_key, cookie_index):
|
||||
"""删除无效的cookie"""
|
||||
try:
|
||||
# 方法1:标记并删除
|
||||
self.redis_client.lset(cookies_key, cookie_index, "__invalid__")
|
||||
self.redis_client.lrem(cookies_key, 1, "__invalid__")
|
||||
logger.info(f"已删除无效的cookie,索引: {cookie_index}")
|
||||
|
||||
# 方法2:或者直接删除整个列表(如果cookie全部无效)
|
||||
# cookie_count = self.redis_client.llen(cookies_key)
|
||||
# if cookie_count <= 1:
|
||||
# self.redis_client.delete(cookies_key)
|
||||
# logger.info(f"已删除所有cookies: {cookies_key}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"删除cookie失败: {e}")
|
||||
|
||||
|
||||
def parse_cookie_string(cookie_str):
|
||||
"""解析 cookie 字符串为 dict"""
|
||||
@ -280,3 +304,6 @@ def add_cookie_smart(driver, name, value, target_domain='mp.weixin.qq.com'):
|
||||
# logger.warning(f"✗ {name} failed: {e}")
|
||||
return False
|
||||
return False # 所有 domain 都失败
|
||||
|
||||
|
||||
|
||||
|
||||
@ -2,3 +2,5 @@
|
||||
#
|
||||
# Please refer to the documentation for information on how to create and manage
|
||||
# your spiders.
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,353 @@
|
||||
import time
|
||||
import logging as logger
|
||||
|
||||
from selenium.webdriver.common.by import By
|
||||
import redis
|
||||
import requests
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
|
||||
from MediaSpiders.settings import REDIS_HOST, REDIS_PORT, REDIS_PWD
|
||||
|
||||
chrome_options = Options()
|
||||
# 指定 chrome.exe 的完整路径
|
||||
chrome_options.binary_location = r"D:\chrome-win64\chrome.exe"
|
||||
# chrome_options.use_chromium = True
|
||||
driver = webdriver.Chrome(
|
||||
executable_path=r"D:\chromedriver-win64\chromedriver.exe",
|
||||
options=chrome_options
|
||||
)
|
||||
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
|
||||
"source": "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
|
||||
})
|
||||
|
||||
# Redis连接
|
||||
redis_client = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PWD, decode_responses=True)
|
||||
COOKIE_KEY = "MediaSpiders:WeChatLinksFetcher_Cookies"
|
||||
|
||||
|
||||
def parse_cookie_string(cookie_str):
|
||||
"""解析 cookie 字符串为 dict"""
|
||||
cookie_dict = {}
|
||||
for item in cookie_str.split(';'):
|
||||
if '=' in item:
|
||||
name, value = item.split('=', 1)
|
||||
cookie_dict[name.strip()] = value.strip()
|
||||
return cookie_dict
|
||||
|
||||
|
||||
def add_cookie_smart(driver, name, value, target_domain='mp.weixin.qq.com'):
|
||||
"""
|
||||
智能添加 cookie:先试目标域名,失败则试父域,再失败则跳过
|
||||
"""
|
||||
# 微信核心 cookie 必须用 mp.weixin.qq.com
|
||||
wechat_critical = ['wxuin', 'slave_sid', 'slave_user', 'bizuin', 'data_ticket', 'token']
|
||||
|
||||
# 腾讯通用 cookie 可尝试父域
|
||||
tencent_common = ['ptui_loginuin', 'RK', 'ptcz', 'ua_id']
|
||||
|
||||
# 策略 1: 核心 cookie → 精确域名
|
||||
if name in wechat_critical:
|
||||
domains_to_try = [target_domain]
|
||||
# 策略 2: 腾讯通用 cookie → 先试目标域,再试父域
|
||||
elif name in tencent_common:
|
||||
domains_to_try = [target_domain, '.weixin.qq.com', '.qq.com']
|
||||
# 策略 3: 其他 cookie → 默认 host-only(不传 domain)
|
||||
else:
|
||||
domains_to_try = [None, target_domain]
|
||||
|
||||
for domain in domains_to_try:
|
||||
cookie = {
|
||||
'name': name,
|
||||
'value': value,
|
||||
'path': '/',
|
||||
'secure': True
|
||||
}
|
||||
if domain:
|
||||
cookie['domain'] = domain
|
||||
|
||||
try:
|
||||
driver.add_cookie(cookie)
|
||||
# logger.debug(f"✓ {name} added with domain={domain or 'host-only'}")
|
||||
return True
|
||||
except Exception as e:
|
||||
if 'invalid cookie domain' in str(e):
|
||||
continue # 尝试下一个 domain
|
||||
else:
|
||||
# logger.warning(f"✗ {name} failed: {e}")
|
||||
return False
|
||||
return False # 所有 domain 都失败
|
||||
|
||||
|
||||
def is_cookie_exists(cookie_str):
|
||||
"""
|
||||
判断cookie是否已存在Redis中
|
||||
返回: (exists, duplicate_index)
|
||||
"""
|
||||
try:
|
||||
# 解析新cookie
|
||||
new_cookie_dict = parse_cookie_string(cookie_str)
|
||||
|
||||
# 获取Redis中所有现有cookie
|
||||
existing_cookies = redis_client.lrange(COOKIE_KEY, 0, -1)
|
||||
|
||||
for idx, existing_cookie in enumerate(existing_cookies):
|
||||
try:
|
||||
existing_dict = parse_cookie_string(existing_cookie)
|
||||
|
||||
# 检查关键字段是否匹配
|
||||
# 微信cookie的关键识别字段
|
||||
key_fields = ['wxuin', 'slave_sid', 'slave_user', 'bizuin']
|
||||
|
||||
matches = 0
|
||||
for field in key_fields:
|
||||
if field in new_cookie_dict and field in existing_dict:
|
||||
if new_cookie_dict[field] == existing_dict[field]:
|
||||
matches += 1
|
||||
|
||||
# 如果匹配到2个以上关键字段,认为是同一个cookie
|
||||
if matches >= 2:
|
||||
return True, idx
|
||||
|
||||
# 或者检查slave_sid(最独特的标识)
|
||||
if 'slave_sid' in new_cookie_dict and 'slave_sid' in existing_dict:
|
||||
if new_cookie_dict['slave_sid'] == existing_dict['slave_sid']:
|
||||
return True, idx
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"解析现有cookie时出错: {e}")
|
||||
continue
|
||||
|
||||
return False, -1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"判断cookie是否存在时出错: {e}")
|
||||
return False, -1
|
||||
|
||||
|
||||
def save_cookie_to_redis(cookie_str, force_save=False):
|
||||
"""
|
||||
保存cookie到Redis,自动去重
|
||||
|
||||
Args:
|
||||
cookie_str: cookie字符串
|
||||
force_save: 是否强制保存(即使存在也保存)
|
||||
|
||||
Returns:
|
||||
bool: 是否保存成功
|
||||
"""
|
||||
try:
|
||||
# 检查是否已存在
|
||||
exists, idx = is_cookie_exists(cookie_str)
|
||||
|
||||
if exists and not force_save:
|
||||
logger.info(f"Cookie已存在 (索引: {idx}),跳过保存")
|
||||
return False
|
||||
|
||||
if exists and force_save:
|
||||
# 删除旧的,保存新的
|
||||
redis_client.lset(COOKIE_KEY, idx, cookie_str)
|
||||
logger.info(f"已更新现有cookie (索引: {idx})")
|
||||
return True
|
||||
else:
|
||||
# 添加新cookie
|
||||
redis_client.rpush(COOKIE_KEY, cookie_str)
|
||||
logger.info(f"已添加新cookie,当前总数: {redis_client.llen(COOKIE_KEY)}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"保存cookie到Redis失败: {e}")
|
||||
return False
|
||||
|
||||
def cookie_dict_to_string(cookie_dict):
|
||||
"""将cookie字典转换为字符串"""
|
||||
return '; '.join([f"{k}={v}" for k, v in cookie_dict.items()])
|
||||
|
||||
def manual_login_and_get_cookie():
|
||||
"""
|
||||
手动扫码登录并获取cookie
|
||||
"""
|
||||
logger.info("开始手动扫码登录流程...")
|
||||
|
||||
try:
|
||||
# 访问微信公众平台
|
||||
driver.get("https://mp.weixin.qq.com/")
|
||||
time.sleep(3)
|
||||
|
||||
# 等待页面加载
|
||||
WebDriverWait(driver, 10).until(
|
||||
EC.presence_of_element_located((By.TAG_NAME, "body"))
|
||||
)
|
||||
|
||||
# 检查是否已登录(通过URL是否包含token)
|
||||
if "token=" in driver.current_url:
|
||||
logger.info("检测到已登录状态,直接获取cookie")
|
||||
else:
|
||||
logger.info("请手动扫描二维码登录...")
|
||||
logger.info("等待登录完成...")
|
||||
|
||||
# 等待登录成功(等待URL变化或特定元素出现)
|
||||
try:
|
||||
# 等待URL中包含token
|
||||
WebDriverWait(driver, 120).until(
|
||||
lambda d: "token=" in d.current_url
|
||||
)
|
||||
logger.info("检测到登录成功!")
|
||||
time.sleep(3)
|
||||
except Exception as e:
|
||||
logger.error("等待登录超时")
|
||||
return None
|
||||
|
||||
# 获取当前页面所有cookies
|
||||
cookies = driver.get_cookies()
|
||||
|
||||
if not cookies:
|
||||
logger.error("未获取到cookies")
|
||||
return None
|
||||
|
||||
# 转换为字符串格式
|
||||
cookie_dict = {}
|
||||
for cookie in cookies:
|
||||
cookie_dict[cookie['name']] = cookie['value']
|
||||
|
||||
cookie_string = cookie_dict_to_string(cookie_dict)
|
||||
|
||||
# 获取token信息
|
||||
token = None
|
||||
if "token=" in driver.current_url:
|
||||
token_index = driver.current_url.rfind('token=')
|
||||
token = driver.current_url[token_index + 6:]
|
||||
logger.info(f"获取到token: {token}")
|
||||
|
||||
logger.info(f"获取到 {len(cookie_dict)} 个cookie")
|
||||
|
||||
return {
|
||||
'cookie_dict': cookie_dict,
|
||||
'cookie_string': cookie_string,
|
||||
'token': token,
|
||||
'raw_cookies': cookies
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"手动登录失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def verify_cookie_valid(cookie_dict, token=None):
|
||||
"""
|
||||
验证cookie是否有效
|
||||
|
||||
Args:
|
||||
cookie_dict: cookie字典
|
||||
token: token字符串
|
||||
|
||||
Returns:
|
||||
bool: cookie是否有效
|
||||
"""
|
||||
try:
|
||||
if not token:
|
||||
# 如果没有token,尝试从cookie中构建
|
||||
pass
|
||||
|
||||
# 构建请求头
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||||
'Referer': f'https://mp.weixin.qq.com/',
|
||||
}
|
||||
|
||||
# 尝试访问一个需要登录的接口
|
||||
test_api = f'https://mp.weixin.qq.com/cgi-bin/bizlogin?action=validate&lang=zh_CN'
|
||||
|
||||
response = requests.get(test_api, cookies=cookie_dict, headers=headers, timeout=10)
|
||||
|
||||
# 检查响应
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
data = response.json()
|
||||
if 'base_resp' in data:
|
||||
err_msg = data['base_resp'].get('err_msg', '')
|
||||
if err_msg in ['ok', '']:
|
||||
logger.info("cookie验证有效")
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"cookie验证返回错误: {err_msg}")
|
||||
return False
|
||||
except:
|
||||
# 如果不是JSON响应,可能仍然有效
|
||||
logger.info("cookie可能有效")
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"cookie验证请求失败: {response.status_code}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"验证cookie时出错: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main(type):
|
||||
"""主函数"""
|
||||
logger.info("微信公众号Cookie获取工具")
|
||||
|
||||
try:
|
||||
# 1. 手动登录获取cookie
|
||||
result = manual_login_and_get_cookie()
|
||||
|
||||
if not result:
|
||||
logger.error("获取cookie失败")
|
||||
return
|
||||
|
||||
cookie_string = result['cookie_string']
|
||||
cookie_dict = result['cookie_dict']
|
||||
token = result['token']
|
||||
|
||||
# 2. 验证cookie有效性
|
||||
logger.info("正在验证cookie有效性...")
|
||||
is_valid = verify_cookie_valid(cookie_dict, token)
|
||||
|
||||
if not is_valid:
|
||||
logger.warning("cookie可能无效,但仍会保存")
|
||||
|
||||
# 3. 检查cookie是否已存在并保存
|
||||
logger.info("正在检查cookie是否已存在...")
|
||||
exists, idx = is_cookie_exists(cookie_string)
|
||||
|
||||
if exists:
|
||||
logger.info(f"Cookie已存在 (索引: {idx})")
|
||||
|
||||
# 询问是否覆盖
|
||||
choice = type
|
||||
if choice == 'y':
|
||||
saved = save_cookie_to_redis(cookie_string, force_save=True)
|
||||
if saved:
|
||||
logger.info("已覆盖更新cookie")
|
||||
else:
|
||||
logger.info("取消保存")
|
||||
else:
|
||||
# 保存新cookie
|
||||
saved = save_cookie_to_redis(cookie_string)
|
||||
if saved:
|
||||
logger.info("新cookie保存成功")
|
||||
|
||||
# 4. 显示当前cookie列表状态
|
||||
total_cookies = redis_client.llen(COOKIE_KEY)
|
||||
logger.info(f"当前Redis中cookie总数: {total_cookies}")
|
||||
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("用户中断程序")
|
||||
except Exception as e:
|
||||
logger.error(f"程序执行出错: {e}")
|
||||
finally:
|
||||
driver.quit()
|
||||
logger.info("程序结束")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 是否覆盖原有cookie? y:覆盖、n:不覆盖
|
||||
type = 'y'
|
||||
# 运行主程序
|
||||
main(type)
|
||||
|
||||
|
||||
@ -20,11 +20,11 @@ def http_get(url):
|
||||
return rsp
|
||||
|
||||
|
||||
def http_post(url, data, headers=None):
|
||||
def http_post(url, data, headers=None, timeout=60):
|
||||
if headers:
|
||||
rsp = requests.post(url, data=data, headers=headers)
|
||||
rsp = requests.post(url, data=data, headers=headers, timeout=timeout)
|
||||
else:
|
||||
rsp = requests.post(url, data=data, headers={'User-Agent': ua})
|
||||
rsp = requests.post(url, data=data, headers={'User-Agent': ua}, timeout=timeout)
|
||||
return rsp
|
||||
|
||||
|
||||
|
||||
@ -26,8 +26,8 @@ def check_session(drive_path):
|
||||
api = drive_path + '/graphql'
|
||||
post_body = '{"query": "{ grid { maxSession, sessionCount } }"}'
|
||||
try:
|
||||
# 添加超时控制,5分钟 = 300秒
|
||||
response = http_post(api, post_body, timeout=300)
|
||||
# 添加超时控制,1分钟 = 600秒
|
||||
response = http_post(api, post_body, timeout=60)
|
||||
data_body = json.loads(response.content.decode())
|
||||
session_info = data_body['data']['grid']
|
||||
return session_info
|
||||
|
||||
@ -9,4 +9,4 @@ dirpath = os.path.dirname(os.path.abspath(__file__))
|
||||
sys.path.append(dirpath)
|
||||
|
||||
if __name__ == "__main__":
|
||||
execute(['scrapy', 'crawl', 'website_info_common', '-a', 'params={"job_id":"801","clusterName":"star_4"}'])
|
||||
execute(['scrapy', 'crawl', 'Website_report_list', '-a', 'params={"job_id":"801","clusterName":"star_4"}'])
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user