137 lines
3.8 KiB
Python
137 lines
3.8 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
最终格式修复脚本
|
||
|
|
确保所有章节都有正确的Markdown格式
|
||
|
|
"""
|
||
|
|
|
||
|
|
import os
|
||
|
|
import re
|
||
|
|
|
||
|
|
CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"
|
||
|
|
|
||
|
|
def final_fix_format(content, filename):
|
||
|
|
"""
|
||
|
|
最终的格式修复
|
||
|
|
"""
|
||
|
|
lines = content.split('\n')
|
||
|
|
|
||
|
|
# 1. 提取章节编号和标题
|
||
|
|
chapter_num = "1"
|
||
|
|
chapter_title = "未命名"
|
||
|
|
|
||
|
|
# 从文件名提取
|
||
|
|
match = re.search(r'ch(\d+)-第\d+章\s+(.+)\.md', filename)
|
||
|
|
if match:
|
||
|
|
chapter_num = match.group(1)
|
||
|
|
chapter_title = match.group(2).strip()
|
||
|
|
|
||
|
|
# 2. 构建标准开头
|
||
|
|
standard_start = f"# 第{chapter_num}章 {chapter_title}\n\n"
|
||
|
|
|
||
|
|
# 3. 清理所有行
|
||
|
|
cleaned_lines = []
|
||
|
|
for line in lines:
|
||
|
|
# 移除开头的多余空格
|
||
|
|
line = line.strip()
|
||
|
|
|
||
|
|
# 跳过空行(后面会统一添加)
|
||
|
|
if not line:
|
||
|
|
continue
|
||
|
|
|
||
|
|
# 移除多余的加粗标记
|
||
|
|
line = re.sub(r'^\*\*(.*?)\*\*$', r'\1', line)
|
||
|
|
|
||
|
|
# 修复中文标点格式
|
||
|
|
line = re.sub(r'\s*([,。!?;:])\s*', r'\1', line)
|
||
|
|
|
||
|
|
cleaned_lines.append(line)
|
||
|
|
|
||
|
|
# 4. 重新组合为段落
|
||
|
|
paragraphs = []
|
||
|
|
current_para = []
|
||
|
|
|
||
|
|
for line in cleaned_lines:
|
||
|
|
if line.startswith('#') or line.startswith('【爽点'):
|
||
|
|
# 保存当前段落
|
||
|
|
if current_para:
|
||
|
|
paragraphs.append(' '.join(current_para))
|
||
|
|
current_para = []
|
||
|
|
paragraphs.append(line)
|
||
|
|
else:
|
||
|
|
current_para.append(line)
|
||
|
|
|
||
|
|
# 最后一个段落
|
||
|
|
if current_para:
|
||
|
|
paragraphs.append(' '.join(current_para))
|
||
|
|
|
||
|
|
# 5. 确保爽点部分在最后
|
||
|
|
shuangdian_paras = []
|
||
|
|
other_paras = []
|
||
|
|
|
||
|
|
for para in paragraphs:
|
||
|
|
if '【爽点' in para:
|
||
|
|
shuangdian_paras.append(para)
|
||
|
|
else:
|
||
|
|
other_paras.append(para)
|
||
|
|
|
||
|
|
# 6. 合并所有段落
|
||
|
|
result = standard_start
|
||
|
|
|
||
|
|
# 添加其他段落
|
||
|
|
for para in other_paras:
|
||
|
|
if para.startswith('#') and para != standard_start.strip():
|
||
|
|
result += para + '\n\n'
|
||
|
|
else:
|
||
|
|
result += para + '\n\n'
|
||
|
|
|
||
|
|
# 添加爽点段落
|
||
|
|
if shuangdian_paras:
|
||
|
|
result += '\n\n'.join(shuangdian_paras) + '\n'
|
||
|
|
|
||
|
|
# 7. 清理格式
|
||
|
|
# 确保段落间有空行
|
||
|
|
result = re.sub(r'\n{3,}', '\n\n', result)
|
||
|
|
|
||
|
|
# 移除多余的空格
|
||
|
|
result = re.sub(r'[ \t]{2,}', ' ', result)
|
||
|
|
|
||
|
|
# 确保章节标题后有空行
|
||
|
|
result = re.sub(r'^# .+\n(?!\n)', r'\g<0>\n', result, flags=re.MULTILINE)
|
||
|
|
|
||
|
|
return result.strip() + '\n'
|
||
|
|
|
||
|
|
def main():
|
||
|
|
print("执行最终格式修复...")
|
||
|
|
|
||
|
|
chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')]
|
||
|
|
|
||
|
|
for filename in sorted(chapter_files, key=lambda x: int(re.search(r'ch(\d+)', x).group(1)) if re.search(r'ch(\d+)', x) else 0):
|
||
|
|
filepath = os.path.join(CHAPTERS_DIR, filename)
|
||
|
|
|
||
|
|
print(f"修复: {filename}")
|
||
|
|
|
||
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
||
|
|
content = f.read()
|
||
|
|
|
||
|
|
# 跳过已经修复过的文件
|
||
|
|
if content.startswith('# 第'):
|
||
|
|
print(f" ✓ 格式已正确")
|
||
|
|
continue
|
||
|
|
|
||
|
|
# 创建备份
|
||
|
|
backup_path = filepath + '.final.bak'
|
||
|
|
with open(backup_path, 'w', encoding='utf-8') as f:
|
||
|
|
f.write(content)
|
||
|
|
|
||
|
|
# 应用最终修复
|
||
|
|
fixed_content = final_fix_format(content, filename)
|
||
|
|
|
||
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
||
|
|
f.write(fixed_content)
|
||
|
|
|
||
|
|
print(f" ✓ 已完成最终修复")
|
||
|
|
|
||
|
|
print("\n所有章节格式修复完成!")
|
||
|
|
|
||
|
|
if __name__ == '__main__':
|
||
|
|
main()
|