novel-doomsday-resurgence/tools/clean_ai_markers.py

117 lines
3.5 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
清理AI生成的各种内部标记和注释
"""
import os
import re
CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"
def clean_ai_markers(content):
"""
清理AI生成的各种标记
"""
lines = content.split('\n')
cleaned_lines = []
for line in lines:
# 1. 移除特定的AI分析标记但保留正常的心理描写
# 这些是明确的AI分析应该移除
ai_markers = [
'展现重生者的先知优势',
'利用未来信息获取利益',
'展现主角的',
'体现人物',
'突出情节',
'【爽点分析】',
'[分析]',
'<!-- 爽点分析:',
]
has_ai_marker = any(marker in line for marker in ai_markers)
# 2. 检查是否是"谈判桌上"那段重复内容
if '谈判桌上,陈末掌握着对手的所有底牌' in line:
# 跳过这一行
continue
# 3. 保留正常的心理描写
# 正常的心理活动使用「」标记,应该保留
if '' in line and '' in line and not has_ai_marker:
# 这是正常的心理描写,保留
cleaned_lines.append(line)
continue
# 4. 移除爽点标题行
if re.search(r'^#\s*【爽点[一二三四五六七八九十]?[:]', line):
continue
# 5. 移除HTML注释中的爽点分析
if line.strip().startswith('<!--') and '爽点' in line:
continue
if line.strip() == '-->':
continue
# 6. 如果没有AI标记保留这一行
if not has_ai_marker:
cleaned_lines.append(line)
# 重新组合内容
result = '\n'.join(cleaned_lines)
# 7. 清理多余的空白行
result = re.sub(r'\n{3,}', '\n\n', result)
# 8. 确保章节以正确的内容结束
# 移除末尾可能遗留的无关内容
lines = result.split('\n')
while lines and not lines[-1].strip():
lines.pop()
result = '\n'.join(lines)
return result.strip()
def main():
print("清理AI生成的各种标记...")
chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')]
for filename in sorted(chapter_files):
filepath = os.path.join(CHAPTERS_DIR, filename)
print(f"检查: {filename}")
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 检查是否需要清理
needs_clean = (
'展现重生者的先知优势' in content or
'谈判桌上,陈末掌握着对手的所有底牌' in content or
'<!-- 爽点分析:' in content
)
if needs_clean:
print(f" ⚠ 需要清理")
# 创建备份
backup_path = filepath + '.ai.bak'
with open(backup_path, 'w', encoding='utf-8') as f:
f.write(content)
# 清理内容
cleaned_content = clean_ai_markers(content)
with open(filepath, 'w', encoding='utf-8') as f:
f.write(cleaned_content)
print(f" ✓ 已清理AI标记")
else:
print(f" ✓ 无需清理")
print("\nAI标记清理完成")
if __name__ == '__main__':
main()