gen_table_comments.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 根据 database 模块下的 md 文档和 _raw_schema.txt 生成表/字段注释的 ALTER SQL。
  5. 用法: python gen_table_comments.py [kucoin|copytrade|all]
  6. 默认 all:同时生成 kucoin 和 copytrade 两个库的 SQL
  7. """
  8. import re
  9. import os
  10. import sys
  11. DB_DIR = os.path.dirname(os.path.abspath(__file__))
  12. SCHEMA_FILE = os.path.join(DB_DIR, '_raw_schema.txt')
  13. COPYTRADE_SCHEMA_FILE = os.path.join(DB_DIR, '_copytrade_dump.sql')
  14. COPYTRADE_TABLES_FILE = os.path.join(DB_DIR, 'copytrade_tables.txt')
  15. SKIP_TABLES = {'activity_copy1', 'country1', 'country2', 'cz'}
  16. def parse_mysqldump_schema(dump_path):
  17. """解析 mysqldump 导出的 schema,返回与 parse_schema 相同结构"""
  18. with open(dump_path, 'r', encoding='utf-8') as f:
  19. content = f.read()
  20. tables = {}
  21. for m in re.finditer(r"CREATE TABLE `([a-z0-9_]+)`\s*\((.*?)\)\s*ENGINE=[^;]+;", content, re.DOTALL):
  22. table_name, block = m.group(1), m.group(2)
  23. # 从 ) ENGINE=... 到 ; 之间提取 COMMENT
  24. tail = content[m.start():m.end()]
  25. tc = re.search(r"COMMENT='([^']*)'", tail)
  26. table_comment = tc.group(1) if tc else None
  27. cols = {}
  28. for line in block.split('\n'):
  29. sline = line.strip()
  30. if sline.startswith(('PRIMARY', 'KEY ', 'UNIQUE ', 'CONSTRAINT ', 'FOREIGN ')):
  31. break
  32. col_m = re.match(r'\s*`(\w+)`\s+(.+)', line)
  33. if not col_m:
  34. continue
  35. col_name, col_def = col_m.group(1), col_m.group(2).strip().rstrip(',')
  36. if col_def.strip().startswith('('):
  37. continue
  38. col_comment = None
  39. cm = re.search(r"\s*COMMENT\s+'([^']*)'", col_def)
  40. if cm:
  41. col_comment = cm.group(1).strip()
  42. col_def_clean = re.sub(r"\s*COMMENT\s+'[^']*'", '', col_def).strip()
  43. cols[col_name] = {'def': col_def_clean, 'comment': col_comment}
  44. tables[table_name] = {'columns': cols, 'table_comment': table_comment}
  45. return tables
  46. def load_copytrade_tables():
  47. """加载 copytrade 库的表清单(仅生成这些表的 SQL)"""
  48. if os.path.exists(COPYTRADE_TABLES_FILE):
  49. tables = set()
  50. with open(COPYTRADE_TABLES_FILE, 'r', encoding='utf-8') as f:
  51. for line in f:
  52. line = line.strip().split('#')[0].strip()
  53. if line and re.match(r'^[a-z_][a-z0-9_]*$', line):
  54. tables.add(line)
  55. if tables:
  56. return tables
  57. # 默认清单:跟单模块核心表及依赖
  58. return {
  59. 'admin', 'admin_access_log', 'admin_permission', 'admin_role', 'admin_role_permission',
  60. 'coin', 'contract_coin', 'contract_order_entrust', 'contract_reward_record',
  61. 'department', 'follow_lock_config', 'follow_sub_wallet', 'follow_wallet',
  62. 'follow_wallet_item', 'funding_rate_history', 'member', 'member_contract_position',
  63. 'member_contract_wallet', 'member_level', 'member_transaction', 'member_wallet',
  64. 'wallet_trans_record',
  65. }
  66. def parse_schema(schema_path):
  67. """解析 _raw_schema.txt"""
  68. with open(schema_path, 'r', encoding='utf-8') as f:
  69. content = f.read()
  70. tables = {}
  71. for m in re.finditer(r'=== TABLE: ([a-z0-9_]+) \(rows: [^)]+\) ===\s*\n(.*?)(?=\n=== TABLE:|\Z)', content, re.DOTALL):
  72. table_name, block = m.group(1), m.group(2)
  73. m2 = re.search(r"\)\s*ENGINE=\S+[^;]*COMMENT='([^']*)'", block)
  74. table_comment = m2.group(1) if m2 else None
  75. cols = {}
  76. # 按行解析,每行格式: `col_name` type_definition,
  77. for line in block.split('\n'):
  78. # 跳过 KEY、PRIMARY、CONSTRAINT 等索引/约束行
  79. sline = line.strip()
  80. if sline.startswith(('PRIMARY', 'KEY ', 'UNIQUE ', 'CONSTRAINT ', 'FOREIGN ')):
  81. break
  82. m = re.match(r'\s*`(\w+)`\s+(.+)', line)
  83. if not m:
  84. continue
  85. col_name, col_def = m.group(1), m.group(2).strip().rstrip(',')
  86. # 跳过索引定义如 KEY `member_id` (`member_id`)
  87. if col_def.strip().startswith('('):
  88. continue
  89. # 提取列定义中的 COMMENT 作为补充来源(md 优先)
  90. col_comment = None
  91. cm = re.search(r"\s*COMMENT\s+'([^']*)'", col_def)
  92. if cm:
  93. col_comment = cm.group(1).strip()
  94. col_def_clean = re.sub(r"\s*COMMENT\s+'[^']*'", '', col_def).strip()
  95. cols[col_name] = {'def': col_def_clean, 'comment': col_comment}
  96. tables[table_name] = {'columns': cols, 'table_comment': table_comment}
  97. return tables
  98. def parse_md_files(db_dir):
  99. """解析 md 文档,提取表名和字段注释"""
  100. result = {}
  101. for fn in sorted(os.listdir(db_dir)):
  102. if not fn.endswith('.md') or fn == 'README.md':
  103. continue
  104. path = os.path.join(db_dir, fn)
  105. with open(path, 'r', encoding='utf-8') as f:
  106. content = f.read()
  107. # 1. 解析模块概述表格: | 表名 | 说明 | 获取表注释
  108. in_overview = False
  109. for line in content.split('\n'):
  110. line = line.strip()
  111. if not line.startswith('|'):
  112. in_overview = False
  113. continue
  114. cells = [c.strip().replace('`', '') for c in line.split('|') if c.strip()]
  115. if len(cells) < 2:
  116. continue
  117. first = cells[0].lower()
  118. if first == '表名':
  119. in_overview = True
  120. continue
  121. if in_overview and re.match(r'^-+$', first):
  122. continue
  123. if in_overview and re.match(r'^[a-z_][a-z0-9_]*$', cells[0], re.I):
  124. tbl, desc = cells[0], cells[1]
  125. if tbl not in result:
  126. result[tbl] = {'table_comment': None, 'columns': {}}
  127. # 仅当尚无表注释时写入,避免「关联扩展表」等覆盖主模块说明
  128. if desc and len(desc) < 150 and result[tbl]['table_comment'] is None:
  129. result[tbl]['table_comment'] = desc
  130. # 2. 匹配表章节: ## 1. table_name — / ### 1. table_name()
  131. for m in re.finditer(r'^#{2,3}\s*(?:\d+\.\s+)?([a-z_][a-z0-9_]*)\s*(?:[—\-—]\s*([^#\n]+?)(?:\s*$)|[((]([^)\)]+)[))])\s*$', content, re.MULTILINE):
  132. table_name = m.group(1)
  133. # 从标题提取表注释:— 后面的文字 或 ()内的文字(overview 说明优先,不覆盖)
  134. title_comment = (m.group(2) or m.group(3) or '').strip().lstrip('— -')
  135. if table_name not in result:
  136. result[table_name] = {'table_comment': None, 'columns': {}}
  137. if title_comment and result[table_name]['table_comment'] is None:
  138. result[table_name]['table_comment'] = title_comment
  139. start = m.end()
  140. # 截取到下一个表章节:## N. 或 ### N. (避免混入其他表)
  141. end = re.search(r'\n^#{2,3}\s*\d+\.\s+', content[start:], re.MULTILINE)
  142. section = content[start:start + end.start()] if end else content[start:start+15000]
  143. if table_name not in result:
  144. result[table_name] = {'table_comment': None, 'columns': {}}
  145. # 表注释: > 表注释:xxx(overview 说明优先,表注释仅在没有 overview 时补充)
  146. tc = re.search(r'表注释[::]\s*[`]?([^`\n]+)', section)
  147. if tc:
  148. val = tc.group(1).strip()
  149. if val and '无(' not in val and '未设置' not in val:
  150. # 仅当尚无 overview 说明时使用 表注释
  151. if result[table_name]['table_comment'] is None:
  152. result[table_name]['table_comment'] = val
  153. # 字段表格: 只解析「字段说明」表(跳过 索引说明、枚举值 等表)
  154. in_table = False
  155. in_field_table = False # 仅在 字段名/列名 开头的表内才采集
  156. for line in section.split('\n'):
  157. line = line.strip()
  158. if not line.startswith('|'):
  159. in_table = False
  160. in_field_table = False
  161. continue
  162. cells = [c.strip() for c in line.split('|') if c.strip()]
  163. if len(cells) < 2:
  164. continue
  165. first = cells[0].replace('`', '').strip().lower()
  166. if re.match(r'^-+$', first):
  167. in_table = True
  168. continue
  169. if first in ('字段', '字段名', 'field', '列名'):
  170. in_table = True
  171. in_field_table = True
  172. continue
  173. if first in ('索引名', '索引', '枚举值', '数据库值'):
  174. in_field_table = False
  175. in_table = True
  176. continue
  177. if not in_table and len(cells) >= 3:
  178. in_table = True
  179. if not in_field_table:
  180. continue
  181. col_name = cells[0].replace('`', '').strip()
  182. if not re.match(r'^[a-z_][a-z0-9_]*$', col_name, re.I):
  183. continue
  184. comment = cells[-1].strip() if len(cells) >= 2 else ''
  185. if comment and comment not in ('说明', '') and len(comment) < 200:
  186. result[table_name]['columns'][col_name] = comment
  187. return result
  188. def escape_sql(s):
  189. """MySQL 字符串转义:单引号用 '' 转义"""
  190. return s.replace("\\", "\\\\").replace("'", "''")
  191. # 常见列名的推断注释(无文档时的兜底)
  192. INFER_COMMENTS = {
  193. 'id': '主键ID',
  194. 'create_time': '创建时间',
  195. 'update_time': '更新时间',
  196. 'create_date': '创建日期',
  197. 'update_date': '更新日期',
  198. 'status': '状态',
  199. 'type': '类型',
  200. 'amount': '数量',
  201. 'balance': '余额',
  202. 'frozen_balance': '冻结余额',
  203. 'remark': '备注',
  204. 'sort': '排序',
  205. 'enable': '是否启用',
  206. 'is_show': '是否显示',
  207. 'is_default': '是否默认',
  208. 'is_top': '是否置顶',
  209. 'name': '名称',
  210. 'title': '标题',
  211. 'content': '内容',
  212. 'description': '描述',
  213. 'symbol': '交易对/符号',
  214. 'price': '价格',
  215. 'fee': '手续费',
  216. 'total': '合计',
  217. 'count': '数量',
  218. 'number': '数量',
  219. 'username': '用户名',
  220. 'password': '密码',
  221. 'phone': '手机号',
  222. 'email': '邮箱',
  223. 'address': '地址',
  224. 'url': 'URL',
  225. 'image_url': '图片URL',
  226. 'link': '链接',
  227. 'code': '编码/代码',
  228. 'key': '键',
  229. 'value': '值',
  230. 'start_time': '开始时间',
  231. 'end_time': '结束时间',
  232. 'deal_time': '成交时间',
  233. 'order_id': '订单ID',
  234. 'coin_id': '币种ID',
  235. 'member_id': '用户ID',
  236. 'admin_id': '管理员ID',
  237. 'parent_id': '父级ID',
  238. 'source_member_id': '来源用户ID',
  239. 'activity_id': '活动ID',
  240. 'role_id': '角色ID',
  241. 'contract_id': '合约ID',
  242. 'position_id': '持仓ID',
  243. 'unit': '单位',
  244. }
  245. def infer_comment(col_name):
  246. """根据列名推断简短注释(兜底用)"""
  247. lower = col_name.lower()
  248. if lower in INFER_COMMENTS:
  249. return INFER_COMMENTS[lower]
  250. if lower.endswith('_id') and lower != 'id':
  251. prefix = lower[:-3].replace('_', ' ')
  252. return f'关联{prefix}的ID'
  253. if lower.endswith('_time'):
  254. return lower[:-5].replace('_', ' ') + '时间'
  255. if lower.endswith('_amount'):
  256. return lower[:-7].replace('_', ' ') + '数量'
  257. return None
  258. def generate_sql(output_path, db_name):
  259. """生成指定库的表/字段注释 SQL"""
  260. table_filter = None
  261. if db_name == 'copytrade' and os.path.exists(COPYTRADE_SCHEMA_FILE):
  262. # copytrade 使用从实际库导出的 schema
  263. schema = parse_mysqldump_schema(COPYTRADE_SCHEMA_FILE)
  264. else:
  265. schema = parse_schema(SCHEMA_FILE)
  266. if db_name == 'copytrade':
  267. table_filter = load_copytrade_tables()
  268. md_data = parse_md_files(DB_DIR)
  269. lines = [
  270. f'-- 根据 database 模块文档生成的表/字段注释 SQL(库: {db_name})',
  271. '-- 连接: mysql -h rm-3nsx177900f04a7gvwo.mysql.rds.aliyuncs.com -u root -p',
  272. '-- 执行前请备份',
  273. f'USE {db_name};',
  274. '',
  275. ]
  276. for table_name in sorted(schema.keys()):
  277. if table_filter is not None and table_name not in table_filter:
  278. continue
  279. if table_name in SKIP_TABLES or 'bak' in table_name.lower():
  280. continue
  281. s = schema[table_name]
  282. md = md_data.get(table_name, {})
  283. tc = md.get('table_comment') or s.get('table_comment')
  284. if tc and ('无(' in tc or '未设置' in tc):
  285. tc = None
  286. if not tc:
  287. tc = table_name.replace('_', ' ') # 无文档时用表名兜底
  288. lines.append(f"ALTER TABLE `{table_name}` COMMENT '{escape_sql(tc)}';")
  289. for col_name, col_info in s.get('columns', {}).items():
  290. if isinstance(col_info, str):
  291. col_def, schema_comment = col_info, None
  292. else:
  293. col_def, schema_comment = col_info.get('def', ''), col_info.get('comment')
  294. comment = md.get('columns', {}).get(col_name) or schema_comment or infer_comment(col_name)
  295. if not comment:
  296. comment = col_name.replace('_', ' ')
  297. full_def = re.sub(r"\s*COMMENT\s+'[^']*'", '', col_def) if col_def else ''
  298. lines.append(f"ALTER TABLE `{table_name}` MODIFY COLUMN `{col_name}` {full_def} COMMENT '{escape_sql(comment)}';")
  299. if tc or s.get('columns'):
  300. lines.append('')
  301. with open(output_path, 'w', encoding='utf-8') as f:
  302. f.write('\n'.join(lines))
  303. print(f'Generated {output_path}')
  304. def main():
  305. dbs = ['kucoin', 'copytrade']
  306. arg = (sys.argv[1] if len(sys.argv) > 1 else 'all').lower()
  307. if arg == 'all':
  308. for db in dbs:
  309. out = os.path.join(DB_DIR, f'alter_table_comments_{db}.sql')
  310. generate_sql(out, db)
  311. elif arg in dbs:
  312. out = os.path.join(DB_DIR, f'alter_table_comments_{arg}.sql')
  313. generate_sql(out, arg)
  314. # 同时更新 alter_table_comments.sql 为主输出
  315. default_out = os.path.join(DB_DIR, 'alter_table_comments.sql')
  316. generate_sql(default_out, arg)
  317. else:
  318. print(f'用法: python gen_table_comments.py [{"|".join(dbs)}|all]')
  319. sys.exit(1)
  320. if __name__ == '__main__':
  321. main()