2026-01-19 09:17:10 +08:00
|
|
|
|
import pymysql
|
|
|
|
|
|
from typing import Dict, List, Tuple, Optional
|
|
|
|
|
|
|
|
|
|
|
|
# ================== 配置区 ==================
|
|
|
|
|
|
|
|
|
|
|
|
DB_CONFIG = {
|
|
|
|
|
|
'host': '47.113.231.200',
|
|
|
|
|
|
'port': 28089,
|
|
|
|
|
|
'user': 'root',
|
|
|
|
|
|
'password': 'passok123A',
|
|
|
|
|
|
'database': 'dsp',
|
|
|
|
|
|
'charset': 'utf8mb4',
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# 仅用于指定哪些 es_srcname 的记录需要处理(值可为空,因为不再做替换)
|
|
|
|
|
|
TARGET_SRCNAMES: List[str] = [
|
|
|
|
|
|
"http://www.kcna.kp/cn/category/articles/q/5394b80bdae203fadef02522cfb578c0.kcmsf",
|
|
|
|
|
|
# 添加你需要处理的站点名
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ================== 工具函数 ==================
|
|
|
|
|
|
|
|
|
|
|
|
def get_suffix_32(url: str) -> Optional[str]:
|
|
|
|
|
|
"""获取 URL 最后 32 个字符,不足则返回 None"""
|
|
|
|
|
|
if not url or len(url) < 32:
|
|
|
|
|
|
return None
|
|
|
|
|
|
return url[-32:]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def find_foreign_by_suffix(cursor, suffix: str, exclude_id: int) -> Optional[Tuple[str, str]]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
根据后缀查找外文记录(排除自身)
|
|
|
|
|
|
"""
|
|
|
|
|
|
query = """
|
|
|
|
|
|
SELECT es_urltitle, es_urlcontent
|
|
|
|
|
|
FROM indeximos
|
|
|
|
|
|
WHERE
|
|
|
|
|
|
es_sid != %s
|
|
|
|
|
|
AND es_urlname IS NOT NULL
|
|
|
|
|
|
AND CHAR_LENGTH(es_urlname) >= 32
|
|
|
|
|
|
AND RIGHT(es_urlname, 32) = %s
|
|
|
|
|
|
LIMIT 1
|
|
|
|
|
|
"""
|
|
|
|
|
|
cursor.execute(query, (exclude_id, suffix))
|
|
|
|
|
|
result = cursor.fetchone()
|
|
|
|
|
|
return result if result else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def update_chinese_record(cursor, record_id: int, title: str, content: str):
|
|
|
|
|
|
"""更新中文记录的 es_title 和 es_content"""
|
|
|
|
|
|
update_query = """
|
|
|
|
|
|
UPDATE indeximos
|
|
|
|
|
|
SET es_title = %s, es_content = %s
|
|
|
|
|
|
WHERE es_sid = %s
|
|
|
|
|
|
"""
|
|
|
|
|
|
cursor.execute(update_query, (title, content, record_id))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ================== 主逻辑 ==================
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
|
if not TARGET_SRCNAMES:
|
|
|
|
|
|
print("⚠️ 未指定任何目标 es_srcname,程序退出。")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
conn = pymysql.connect(**DB_CONFIG)
|
|
|
|
|
|
cursor = conn.cursor()
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
# 获取所有目标站点的中文记录
|
|
|
|
|
|
placeholders = ','.join(['%s'] * len(TARGET_SRCNAMES))
|
|
|
|
|
|
query = f"""
|
|
|
|
|
|
SELECT es_sid, es_srcname, es_urlname
|
|
|
|
|
|
FROM indeximos
|
|
|
|
|
|
WHERE es_srcname IN ({placeholders})
|
|
|
|
|
|
AND es_urlname IS NOT NULL
|
2026-01-20 11:07:54 +08:00
|
|
|
|
AND es_urlname != ''
|
|
|
|
|
|
AND es_loadtime > '2026-01-16 10:40:00'
|
2026-01-19 09:17:10 +08:00
|
|
|
|
"""
|
|
|
|
|
|
cursor.execute(query, TARGET_SRCNAMES)
|
|
|
|
|
|
records = cursor.fetchall()
|
|
|
|
|
|
total = len(records)
|
|
|
|
|
|
print(f"共加载 {total} 条来自 {TARGET_SRCNAMES} 的记录用于匹配...")
|
|
|
|
|
|
|
|
|
|
|
|
updated_count = 0
|
|
|
|
|
|
skipped_short = 0
|
|
|
|
|
|
|
|
|
|
|
|
for idx, (record_id, es_srcname, es_urlname) in enumerate(records, 1):
|
|
|
|
|
|
suffix = get_suffix_32(es_urlname)
|
|
|
|
|
|
if suffix is None:
|
|
|
|
|
|
skipped_short += 1
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
foreign_data = find_foreign_by_suffix(cursor, suffix, record_id)
|
|
|
|
|
|
if foreign_data:
|
|
|
|
|
|
title, content = foreign_data
|
|
|
|
|
|
update_chinese_record(cursor, record_id, title, content)
|
|
|
|
|
|
updated_count += 1
|
|
|
|
|
|
print(f"[{idx}/{total}] ✅ 已更新 ID={record_id} | src={es_srcname}")
|
|
|
|
|
|
|
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
print("\n" + "=" * 50)
|
|
|
|
|
|
print(f"✅ 匹配完成!")
|
|
|
|
|
|
print(f" - 成功更新: {updated_count} 条")
|
|
|
|
|
|
print(f" - 因 URL 长度 <32 跳过: {skipped_short} 条")
|
|
|
|
|
|
print(f" - 总处理: {total} 条")
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
conn.rollback()
|
|
|
|
|
|
print(f"❌ 发生错误,已回滚: {e}")
|
|
|
|
|
|
raise
|
|
|
|
|
|
finally:
|
|
|
|
|
|
cursor.close()
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
main()
|