119 lines
3.5 KiB
Python
119 lines
3.5 KiB
Python
import pymysql
|
||
from typing import Dict, List, Tuple, Optional
|
||
|
||
# ================== 配置区 ==================
|
||
|
||
DB_CONFIG = {
|
||
'host': '47.113.231.200',
|
||
'port': 28089,
|
||
'user': 'root',
|
||
'password': 'passok123A',
|
||
'database': 'dsp',
|
||
'charset': 'utf8mb4',
|
||
}
|
||
|
||
# 仅用于指定哪些 es_srcname 的记录需要处理(值可为空,因为不再做替换)
|
||
TARGET_SRCNAMES: List[str] = [
|
||
"http://www.kcna.kp/cn/category/articles/q/5394b80bdae203fadef02522cfb578c0.kcmsf",
|
||
# 添加你需要处理的站点名
|
||
]
|
||
|
||
|
||
# ================== 工具函数 ==================
|
||
|
||
def get_suffix_32(url: str) -> Optional[str]:
|
||
"""获取 URL 最后 32 个字符,不足则返回 None"""
|
||
if not url or len(url) < 32:
|
||
return None
|
||
return url[-32:]
|
||
|
||
|
||
def find_foreign_by_suffix(cursor, suffix: str, exclude_id: int) -> Optional[Tuple[str, str]]:
|
||
"""
|
||
根据后缀查找外文记录(排除自身)
|
||
"""
|
||
query = """
|
||
SELECT es_urltitle, es_urlcontent
|
||
FROM indeximos
|
||
WHERE
|
||
es_sid != %s
|
||
AND es_urlname IS NOT NULL
|
||
AND CHAR_LENGTH(es_urlname) >= 32
|
||
AND RIGHT(es_urlname, 32) = %s
|
||
LIMIT 1
|
||
"""
|
||
cursor.execute(query, (exclude_id, suffix))
|
||
result = cursor.fetchone()
|
||
return result if result else None
|
||
|
||
|
||
def update_chinese_record(cursor, record_id: int, title: str, content: str):
|
||
"""更新中文记录的 es_title 和 es_content"""
|
||
update_query = """
|
||
UPDATE indeximos
|
||
SET es_title = %s, es_content = %s
|
||
WHERE es_sid = %s
|
||
"""
|
||
cursor.execute(update_query, (title, content, record_id))
|
||
|
||
|
||
# ================== 主逻辑 ==================
|
||
|
||
def main():
|
||
if not TARGET_SRCNAMES:
|
||
print("⚠️ 未指定任何目标 es_srcname,程序退出。")
|
||
return
|
||
|
||
conn = pymysql.connect(**DB_CONFIG)
|
||
cursor = conn.cursor()
|
||
|
||
try:
|
||
# 获取所有目标站点的中文记录
|
||
placeholders = ','.join(['%s'] * len(TARGET_SRCNAMES))
|
||
query = f"""
|
||
SELECT es_sid, es_srcname, es_urlname
|
||
FROM indeximos
|
||
WHERE es_srcname IN ({placeholders})
|
||
AND es_urlname IS NOT NULL
|
||
AND es_urlname != ''
|
||
"""
|
||
cursor.execute(query, TARGET_SRCNAMES)
|
||
records = cursor.fetchall()
|
||
total = len(records)
|
||
print(f"共加载 {total} 条来自 {TARGET_SRCNAMES} 的记录用于匹配...")
|
||
|
||
updated_count = 0
|
||
skipped_short = 0
|
||
|
||
for idx, (record_id, es_srcname, es_urlname) in enumerate(records, 1):
|
||
suffix = get_suffix_32(es_urlname)
|
||
if suffix is None:
|
||
skipped_short += 1
|
||
continue
|
||
|
||
foreign_data = find_foreign_by_suffix(cursor, suffix, record_id)
|
||
if foreign_data:
|
||
title, content = foreign_data
|
||
update_chinese_record(cursor, record_id, title, content)
|
||
updated_count += 1
|
||
print(f"[{idx}/{total}] ✅ 已更新 ID={record_id} | src={es_srcname}")
|
||
|
||
conn.commit()
|
||
print("\n" + "=" * 50)
|
||
print(f"✅ 匹配完成!")
|
||
print(f" - 成功更新: {updated_count} 条")
|
||
print(f" - 因 URL 长度 <32 跳过: {skipped_short} 条")
|
||
print(f" - 总处理: {total} 条")
|
||
|
||
except Exception as e:
|
||
conn.rollback()
|
||
print(f"❌ 发生错误,已回滚: {e}")
|
||
raise
|
||
finally:
|
||
cursor.close()
|
||
conn.close()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|