novel-doomsday-resurgence/tools/fix_chapter_format.py

126 lines
4.0 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
修复小说章节格式问题的Python脚本
专门处理inkos生成的Markdown文件格式问题
"""
import os
import re
import sys
CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"
def fix_chapter_format(content):
"""
修复章节内容的格式问题
"""
lines = content.split('\n')
fixed_lines = []
i = 0
while i < len(lines):
line = lines[i]
# 1. 处理标题行
if line.startswith('#'):
# 确保#后面有空格
if not line.startswith('# '):
line = re.sub(r'^#+', '# ', line)
# 移除标题中的多余空格
line = re.sub(r'\s+', ' ', line).strip()
# 2. 移除开头的加粗标记
line = re.sub(r'^\*\*(.*?)\*\*$', r'\1', line)
# 3. 修复段落开头和结尾的星号
line = re.sub(r'^\*+', '', line)
line = re.sub(r'\*+$', '', line)
# 4. 处理"第X章"重复问题
if re.match(r'^[#\s]*第\d+章\s+', line):
# 移除多余的"第X章"
line = re.sub(r'^[#\s]*第\d+章\s+', '', line)
# 5. 处理重复的爽点段落
if '【爽点' in line:
# 检查下一行是否也是爽点段落
if i + 1 < len(lines) and '【爽点' in lines[i + 1]:
# 跳过重复的爽点段落
i += 1
continue
# 6. 修复多余的空白行
if line.strip() == '':
if not fixed_lines or fixed_lines[-1].strip() == '':
# 跳过连续的空白行
i += 1
continue
# 7. 修复中文标点格式
line = re.sub(r'([,。!?;:])\s*', r'\1', line) # 中文标点后去除空格
line = re.sub(r'\s*([,。!?;:])', r'\1', line) # 中文标点前去除空格
# 8. 修复英文标点格式
line = re.sub(r'([a-zA-Z])\s*([,.!?;:])\s*', r'\1\2 ', line) # 英文标点后加空格
fixed_lines.append(line)
i += 1
# 重新构建内容
result = '\n'.join(fixed_lines)
# 9. 修复章节标题格式
# 确保每个文件以"# 第X章 标题"开头
title_match = re.search(r'第(\d+)章\s+(.+)', result[:100])
if title_match:
chapter_num = title_match.group(1)
chapter_title = title_match.group(2)
# 创建标准标题
standard_title = f"# 第{chapter_num}{chapter_title}"
# 替换开头的标题
result = re.sub(r'^.*第\d+章.*$', standard_title, result, flags=re.MULTILINE)
# 10. 确保段落之间有适当的空行
result = re.sub(r'\n{3,}', '\n\n', result)
return result
def main():
print("开始修复章节格式问题...")
# 获取所有章节文件
chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')]
fixed_count = 0
for filename in sorted(chapter_files):
filepath = os.path.join(CHAPTERS_DIR, filename)
print(f"处理文件: {filename}")
# 读取文件内容
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 修复格式
fixed_content = fix_chapter_format(content)
# 检查是否有变化
if content != fixed_content:
# 备份原文件
backup_path = filepath + '.bak'
with open(backup_path, 'w', encoding='utf-8') as f:
f.write(content)
# 写入修复后的内容
with open(filepath, 'w', encoding='utf-8') as f:
f.write(fixed_content)
print(f" ✓ 已修复格式问题 (备份: {filename}.bak)")
fixed_count += 1
else:
print(f" ✓ 格式正常")
print(f"\n修复完成!共处理了 {len(chapter_files)} 个文件,修复了 {fixed_count} 个文件的格式问题。")
if __name__ == '__main__':
main()