146 lines
4.6 KiB
Python
146 lines
4.6 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
清理重复的爽点段落和修复标题格式
|
||
|
|
"""
|
||
|
|
|
||
|
|
import os
|
||
|
|
import re
|
||
|
|
|
||
|
|
CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"
|
||
|
|
|
||
|
|
def clean_duplicate_sections(content):
|
||
|
|
"""
|
||
|
|
清理重复的爽点段落和修复格式
|
||
|
|
"""
|
||
|
|
lines = content.split('\n')
|
||
|
|
cleaned_lines = []
|
||
|
|
|
||
|
|
i = 0
|
||
|
|
while i < len(lines):
|
||
|
|
line = lines[i]
|
||
|
|
|
||
|
|
# 1. 修复章节标题
|
||
|
|
# 如果行包含"第X章"但不是以#开头,添加#
|
||
|
|
if re.search(r'第\d+章', line) and not line.startswith('#'):
|
||
|
|
# 提取章节编号和标题
|
||
|
|
match = re.search(r'第(\d+)章\s*(.+)', line)
|
||
|
|
if match:
|
||
|
|
chapter_num = match.group(1)
|
||
|
|
title = match.group(2).strip()
|
||
|
|
line = f"# 第{chapter_num}章 {title}"
|
||
|
|
|
||
|
|
# 2. 处理重复的爽点段落
|
||
|
|
if '【爽点' in line:
|
||
|
|
# 检查是否重复
|
||
|
|
if cleaned_lines and '【爽点' in cleaned_lines[-1]:
|
||
|
|
# 跳过重复的爽点行
|
||
|
|
i += 1
|
||
|
|
continue
|
||
|
|
|
||
|
|
# 检查下一行是否也是爽点段落
|
||
|
|
if i + 1 < len(lines) and '【爽点' in lines[i + 1]:
|
||
|
|
# 跳过重复的爽点段落
|
||
|
|
i += 1
|
||
|
|
continue
|
||
|
|
|
||
|
|
# 3. 清理多余的空白行
|
||
|
|
if line.strip() == '':
|
||
|
|
if not cleaned_lines or cleaned_lines[-1].strip() == '':
|
||
|
|
i += 1
|
||
|
|
continue
|
||
|
|
|
||
|
|
cleaned_lines.append(line)
|
||
|
|
i += 1
|
||
|
|
|
||
|
|
# 重新构建内容
|
||
|
|
result = '\n'.join(cleaned_lines)
|
||
|
|
|
||
|
|
# 4. 修复爽点部分的格式
|
||
|
|
# 将爽点部分移到章节结尾,并确保格式正确
|
||
|
|
if '【爽点' in result:
|
||
|
|
# 找到爽点部分
|
||
|
|
sections = result.split('\n\n')
|
||
|
|
main_content = []
|
||
|
|
shuangdian_sections = []
|
||
|
|
|
||
|
|
for section in sections:
|
||
|
|
if '【爽点' in section:
|
||
|
|
shuangdian_sections.append(section)
|
||
|
|
else:
|
||
|
|
main_content.append(section)
|
||
|
|
|
||
|
|
# 清理爽点部分
|
||
|
|
cleaned_shuangdian = []
|
||
|
|
seen = set()
|
||
|
|
for section in shuangdian_sections:
|
||
|
|
# 提取爽点内容(去除重复)
|
||
|
|
lines = section.split('\n')
|
||
|
|
key_lines = []
|
||
|
|
for line in lines:
|
||
|
|
if '【爽点' in line:
|
||
|
|
# 提取爽点编号
|
||
|
|
match = re.search(r'【爽点([^】]+)】', line)
|
||
|
|
if match:
|
||
|
|
key = match.group(1)
|
||
|
|
if key not in seen:
|
||
|
|
seen.add(key)
|
||
|
|
key_lines.append(line)
|
||
|
|
elif line.strip() and not line.startswith('【爽点'):
|
||
|
|
key_lines.append(line)
|
||
|
|
|
||
|
|
if key_lines:
|
||
|
|
cleaned_shuangdian.append('\n'.join(key_lines))
|
||
|
|
|
||
|
|
# 重新组合内容
|
||
|
|
result = '\n\n'.join(main_content)
|
||
|
|
if cleaned_shuangdian:
|
||
|
|
result += '\n\n' + '\n\n'.join(cleaned_shuangdian)
|
||
|
|
|
||
|
|
# 5. 确保章节标题在开头
|
||
|
|
title_match = re.search(r'第(\d+)章\s+(.+)', result[:200])
|
||
|
|
if title_match:
|
||
|
|
chapter_num = title_match.group(1)
|
||
|
|
chapter_title = title_match.group(2).strip()
|
||
|
|
standard_title = f"# 第{chapter_num}章 {chapter_title}"
|
||
|
|
|
||
|
|
# 替换开头的标题
|
||
|
|
result = re.sub(r'^.*第\d+章.*$', standard_title, result, flags=re.MULTILINE)
|
||
|
|
|
||
|
|
# 6. 清理多余的换行
|
||
|
|
result = re.sub(r'\n{3,}', '\n\n', result)
|
||
|
|
|
||
|
|
return result.strip() + '\n'
|
||
|
|
|
||
|
|
def main():
|
||
|
|
print("清理重复的爽点段落和修复格式...")
|
||
|
|
|
||
|
|
chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')]
|
||
|
|
|
||
|
|
for filename in sorted(chapter_files):
|
||
|
|
filepath = os.path.join(CHAPTERS_DIR, filename)
|
||
|
|
|
||
|
|
print(f"处理: {filename}")
|
||
|
|
|
||
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
||
|
|
content = f.read()
|
||
|
|
|
||
|
|
cleaned_content = clean_duplicate_sections(content)
|
||
|
|
|
||
|
|
if content != cleaned_content:
|
||
|
|
# 创建备份
|
||
|
|
backup_path = filepath + '.clean.bak'
|
||
|
|
with open(backup_path, 'w', encoding='utf-8') as f:
|
||
|
|
f.write(content)
|
||
|
|
|
||
|
|
# 写入清理后的内容
|
||
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
||
|
|
f.write(cleaned_content)
|
||
|
|
|
||
|
|
print(f" ✓ 已清理重复内容")
|
||
|
|
else:
|
||
|
|
print(f" ✓ 无需清理")
|
||
|
|
|
||
|
|
print("\n清理完成!")
|
||
|
|
|
||
|
|
if __name__ == '__main__':
|
||
|
|
main()
|