import pymysql from typing import Dict, List, Tuple, Optional # ================== 配置区 ================== DB_CONFIG = { 'host': '47.113.231.200', 'port': 28089, 'user': 'root', 'password': 'passok123A', 'database': 'dsp', 'charset': 'utf8mb4', } # 仅用于指定哪些 es_srcname 的记录需要处理(值可为空,因为不再做替换) TARGET_SRCNAMES: List[str] = [ "http://www.kcna.kp/cn/category/articles/q/5394b80bdae203fadef02522cfb578c0.kcmsf", # 添加你需要处理的站点名 ] # ================== 工具函数 ================== def get_suffix_32(url: str) -> Optional[str]: """获取 URL 最后 32 个字符,不足则返回 None""" if not url or len(url) < 32: return None return url[-32:] def find_foreign_by_suffix(cursor, suffix: str, exclude_id: int) -> Optional[Tuple[str, str]]: """ 根据后缀查找外文记录(排除自身) """ query = """ SELECT es_urltitle, es_urlcontent FROM indeximos WHERE es_sid != %s AND es_urlname IS NOT NULL AND CHAR_LENGTH(es_urlname) >= 32 AND RIGHT(es_urlname, 32) = %s LIMIT 1 """ cursor.execute(query, (exclude_id, suffix)) result = cursor.fetchone() return result if result else None def update_chinese_record(cursor, record_id: int, title: str, content: str): """更新中文记录的 es_title 和 es_content""" update_query = """ UPDATE indeximos SET es_title = %s, es_content = %s WHERE es_sid = %s """ cursor.execute(update_query, (title, content, record_id)) # ================== 主逻辑 ================== def main(): if not TARGET_SRCNAMES: print("⚠️ 未指定任何目标 es_srcname,程序退出。") return conn = pymysql.connect(**DB_CONFIG) cursor = conn.cursor() try: # 获取所有目标站点的中文记录 placeholders = ','.join(['%s'] * len(TARGET_SRCNAMES)) query = f""" SELECT es_sid, es_srcname, es_urlname FROM indeximos WHERE es_srcname IN ({placeholders}) AND es_urlname IS NOT NULL AND es_urlname != '' """ cursor.execute(query, TARGET_SRCNAMES) records = cursor.fetchall() total = len(records) print(f"共加载 {total} 条来自 {TARGET_SRCNAMES} 的记录用于匹配...") updated_count = 0 skipped_short = 0 for idx, (record_id, es_srcname, es_urlname) in enumerate(records, 1): suffix = get_suffix_32(es_urlname) if suffix is None: skipped_short += 1 continue foreign_data = find_foreign_by_suffix(cursor, suffix, record_id) if foreign_data: title, content = foreign_data update_chinese_record(cursor, record_id, title, content) updated_count += 1 print(f"[{idx}/{total}] ✅ 已更新 ID={record_id} | src={es_srcname}") conn.commit() print("\n" + "=" * 50) print(f"✅ 匹配完成!") print(f" - 成功更新: {updated_count} 条") print(f" - 因 URL 长度 <32 跳过: {skipped_short} 条") print(f" - 总处理: {total} 条") except Exception as e: conn.rollback() print(f"❌ 发生错误,已回滚: {e}") raise finally: cursor.close() conn.close() if __name__ == "__main__": main()