novel-doomsday-resurgence/tools/remove_shuangdian_from_content.py

159 lines
5.2 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
从正文中移除爽点分析内容
将爽点分析提取到单独的注释部分
"""
import os
import re
CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"
def remove_shuangdian_from_content(content):
"""
从正文中移除爽点分析内容
"""
# 1. 移除正文中的爽点分析句子
patterns_to_remove = [
r'展现重生者的先知优势[,。]?',
r'利用未来信息获取利益[,。]?',
r'展现主角的[^,。]+[,。]?',
r'体现[^,。]+的[^,。]+[,。]?',
r'突出[^,。]+的[^,。]+[,。]?',
]
for pattern in patterns_to_remove:
content = re.sub(pattern, '', content)
# 2. 移除正文中的AI分析标记
ai_patterns = [
r'【[^】]+】', # 移除【爽点分析】等标记
r'\[[^\]]+\]', # 移除[分析]等标记
r'「[^」]+」', # 移除「分析」等标记
]
for pattern in ai_patterns:
# 但保留章节标题中的【爽点XXXX】
lines = content.split('\n')
cleaned_lines = []
for line in lines:
if line.startswith('# 第'):
# 保留章节标题
cleaned_lines.append(line)
else:
# 移除正文中的分析标记
cleaned_line = re.sub(pattern, '', line)
cleaned_lines.append(cleaned_line)
content = '\n'.join(cleaned_lines)
# 3. 提取爽点分析部分(如果存在)
shuangdian_sections = []
main_content_lines = []
lines = content.split('\n')
in_shuangdian_section = False
current_shuangdian = []
for line in lines:
# 检测爽点部分开始
if re.search(r'【爽点[一二三四五六七八九十]?[:]', line):
in_shuangdian_section = True
current_shuangdian.append(line)
elif in_shuangdian_section:
if line.strip() and not line.startswith('#') and not re.search(r'【爽点[一二三四五六七八九十]?[:]', line):
current_shuangdian.append(line)
else:
# 爽点部分结束
if current_shuangdian:
shuangdian_sections.append('\n'.join(current_shuangdian))
current_shuangdian = []
in_shuangdian_section = False
if line.strip():
main_content_lines.append(line)
else:
main_content_lines.append(line)
# 处理最后一个爽点部分
if current_shuangdian:
shuangdian_sections.append('\n'.join(current_shuangdian))
# 4. 重新组合内容
result = '\n'.join(main_content_lines)
# 5. 清理重复的爽点标题
# 移除正文中重复的爽点标题部分
result_lines = result.split('\n')
cleaned_result_lines = []
i = 0
while i < len(result_lines):
line = result_lines[i]
# 检查是否是重复的爽点内容
if '谈判桌上,陈末掌握着对手的所有底牌' in line:
# 跳过这几行
i += 3 # 跳过这一行和后面的两行
continue
cleaned_result_lines.append(line)
i += 1
result = '\n'.join(cleaned_result_lines)
# 6. 清理多余的空白行
result = re.sub(r'\n{3,}', '\n\n', result)
# 7. 如果有爽点分析,添加到末尾作为注释
if shuangdian_sections:
result += '\n\n<!-- 爽点分析:\n'
for section in shuangdian_sections:
result += section + '\n\n'
result += '-->'
return result.strip()
def main():
print("从正文中移除爽点分析内容...")
chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')]
fixed_count = 0
for filename in sorted(chapter_files):
filepath = os.path.join(CHAPTERS_DIR, filename)
print(f"处理: {filename}")
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 检查是否需要修复
needs_fix = False
if '展现重生者的先知优势' in content:
needs_fix = True
if re.search(r'【爽点[一二三四五六七八九十]?[:]', content):
needs_fix = True
if '谈判桌上,陈末掌握着对手的所有底牌' in content:
needs_fix = True
if not needs_fix:
print(f" ✓ 无需修复")
continue
# 创建备份
backup_path = filepath + '.shuangdian.bak'
with open(backup_path, 'w', encoding='utf-8') as f:
f.write(content)
# 修复内容
fixed_content = remove_shuangdian_from_content(content)
# 检查是否有变化
if content != fixed_content:
with open(filepath, 'w', encoding='utf-8') as f:
f.write(fixed_content)
print(f" ✓ 已移除爽点分析内容")
fixed_count += 1
else:
print(f" ✓ 无需修复")
print(f"\n修复完成!共处理了 {len(chapter_files)} 个文件,修复了 {fixed_count} 个文件的爽点分析问题。")
if __name__ == '__main__':
main()