Files
NASOpenClawRunTime/tools/gen-wechat-md.py

101 lines
2.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
生成微信公众号可用的 Markdown 文档
从母版草稿提取正文剥离内部元数据合规标注、填写状态、Key Takeaways等
输出干净的发布用 MD 文件
"""
import sys
import re
from pathlib import Path
def extract_wechat_md(input_path: str, output_path: str):
with open(input_path, 'r', encoding='utf-8') as f:
content = f.read()
# 去掉文件头部元数据块(> **填写状态** ... 到 --- 之间的全部内容)
content = re.sub(
r'^# 【母版】.*?\n\n>(.*?\n)*?---\n\n',
'',
content,
flags=re.MULTILINE
)
# 去掉 ## 基本信息 整节
content = re.sub(
r'\n## 基本信息\n\n\|.*?\n',
'\n',
content,
flags=re.DOTALL
)
# 去掉 ## 标题候选 整节(保留正文标题)
content = re.sub(
r'\n## 标题候选\n\n\*\*A.*?\n\n',
'\n',
content,
flags=re.DOTALL
)
# 去掉 ## SEO 关键词 整节
content = re.sub(
r'\n## SEO 关键词\n+\n```[\s\S]*?```\n',
'\n',
content
)
# 去掉 ## 配图需求 整节
content = re.sub(
r'\n## 配图需求\n+> \*\*描述\*\*.*?(?=\n## |\n### Key Takeaways|$)',
'',
content,
flags=re.DOTALL
)
# 去掉 ## Key Takeaways 整节
content = re.sub(
r'\n## Key Takeaways.*?(?=## 合规|$)',
'',
content,
flags=re.DOTALL
)
# 去掉 ## 合规自检记录 整节
content = re.sub(
r'\n## 合规自检记录\n\n.*',
'',
content,
flags=re.DOTALL
)
# 把 H2 标题(###)统一为 ## (公众号风格)
content = re.sub(r'^### (.+)$', r'## \1', content, flags=re.MULTILINE)
# 去掉残留的空行超过2个连续换行的压缩为2个
content = re.sub(r'\n{3,}', '\n\n', content)
# 去掉末尾多余空白
content = content.rstrip() + '\n'
with open(output_path, 'w', encoding='utf-8') as f:
f.write(content)
print(f"✅ 生成完成:{output_path}")
return output_path
if __name__ == '__main__':
if len(sys.argv) < 2:
# 默认处理今天的母版
drafts_dir = Path('/home/node/.openclaw/workspace/drafts')
files = sorted(drafts_dir.glob('*master*_v2.md'))
if files:
input_file = str(files[-1])
output_file = input_file.replace('_v2.md', '_公众号版.md')
else:
print("未找到母版文件")
sys.exit(1)
else:
input_file = sys.argv[1]
output_file = sys.argv[2] if len(sys.argv) > 2 else input_file.replace('.md', '_公众号版.md')
extract_wechat_md(input_file, output_file)