novel-doomsday-resurgence/tools/clean_duplicate_titles.py

69 lines
2.1 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
清理重复的章节标题
"""
import os
import re
CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"
def clean_duplicate_titles(content, filename):
"""
清理重复的章节标题
"""
# 从文件名提取章节标题
match = re.search(r'ch\d+-第\d+章\s+(.+)\.md', filename)
if not match:
return content
chapter_title = match.group(1).strip()
lines = content.split('\n')
if not lines:
return content
# 第一行应该是章节标题
if lines[0].startswith('# 第'):
# 检查第一行是否包含重复的章节标题
first_line = lines[0]
title_pattern = f"# 第\\d+章 {re.escape(chapter_title)}"
# 如果第一行已经正确,就检查第二行
if re.match(title_pattern, first_line):
# 检查第二行是否也包含章节标题
if len(lines) > 1 and chapter_title in lines[1]:
# 移除第二行的重复标题
lines[1] = lines[1].replace(chapter_title, '').strip()
# 如果第二行变成空或只剩下空格,移除它
if not lines[1] or lines[1].isspace():
lines.pop(1)
return '\n'.join(lines)
def main():
print("清理重复的章节标题...")
chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')]
for filename in sorted(chapter_files):
filepath = os.path.join(CHAPTERS_DIR, filename)
print(f"处理: {filename}")
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
cleaned_content = clean_duplicate_titles(content, filename)
if content != cleaned_content:
with open(filepath, 'w', encoding='utf-8') as f:
f.write(cleaned_content)
print(f" ✓ 已清理重复标题")
else:
print(f" ✓ 无需清理")
print("\n清理完成!")
if __name__ == '__main__':
main()