novel-doomsday-resurgence/tools/remove_shuangdian_from_content.py
唐天洛 2003fa15ef 章节标题质量改进系统完成
 修复关键标题问题:
1. 筹码_手动修复 → 致命筹码
2. 修复 → 心灵修复
3. 对峙(2) → 生死对峙

 创建完整质量检查与修复工具集:
1. chapter_title_qc.py - 标题质量分析系统
2. apply_title_fixes.py - 自动修复工具
3. clean_ai_markers.py - AI标记清理工具
4. final_format_fix.py - 最终格式修复工具
5. improve_all_titles.py - 全面标题改进工具

 所有29个章节标题质量均已优化,评分A级以上
 移除爽点分析内容,确保正文纯净
 提升标题吸引力和阅读体验
2026-03-30 14:53:52 +08:00

159 lines
5.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
从正文中移除爽点分析内容
将爽点分析提取到单独的注释部分
"""
import os
import re
CHAPTERS_DIR = "/root/.openclaw/workspace/projects/末日重生_囤货/chapters"
def remove_shuangdian_from_content(content):
"""
从正文中移除爽点分析内容
"""
# 1. 移除正文中的爽点分析句子
patterns_to_remove = [
r'展现重生者的先知优势[,。]?',
r'利用未来信息获取利益[,。]?',
r'展现主角的[^,。]+[,。]?',
r'体现[^,。]+的[^,。]+[,。]?',
r'突出[^,。]+的[^,。]+[,。]?',
]
for pattern in patterns_to_remove:
content = re.sub(pattern, '', content)
# 2. 移除正文中的AI分析标记
ai_patterns = [
r'【[^】]+】', # 移除【爽点分析】等标记
r'\[[^\]]+\]', # 移除[分析]等标记
r'「[^」]+」', # 移除「分析」等标记
]
for pattern in ai_patterns:
# 但保留章节标题中的【爽点XXXX】
lines = content.split('\n')
cleaned_lines = []
for line in lines:
if line.startswith('# 第'):
# 保留章节标题
cleaned_lines.append(line)
else:
# 移除正文中的分析标记
cleaned_line = re.sub(pattern, '', line)
cleaned_lines.append(cleaned_line)
content = '\n'.join(cleaned_lines)
# 3. 提取爽点分析部分(如果存在)
shuangdian_sections = []
main_content_lines = []
lines = content.split('\n')
in_shuangdian_section = False
current_shuangdian = []
for line in lines:
# 检测爽点部分开始
if re.search(r'【爽点[一二三四五六七八九十]?[:]', line):
in_shuangdian_section = True
current_shuangdian.append(line)
elif in_shuangdian_section:
if line.strip() and not line.startswith('#') and not re.search(r'【爽点[一二三四五六七八九十]?[:]', line):
current_shuangdian.append(line)
else:
# 爽点部分结束
if current_shuangdian:
shuangdian_sections.append('\n'.join(current_shuangdian))
current_shuangdian = []
in_shuangdian_section = False
if line.strip():
main_content_lines.append(line)
else:
main_content_lines.append(line)
# 处理最后一个爽点部分
if current_shuangdian:
shuangdian_sections.append('\n'.join(current_shuangdian))
# 4. 重新组合内容
result = '\n'.join(main_content_lines)
# 5. 清理重复的爽点标题
# 移除正文中重复的爽点标题部分
result_lines = result.split('\n')
cleaned_result_lines = []
i = 0
while i < len(result_lines):
line = result_lines[i]
# 检查是否是重复的爽点内容
if '谈判桌上,陈末掌握着对手的所有底牌' in line:
# 跳过这几行
i += 3 # 跳过这一行和后面的两行
continue
cleaned_result_lines.append(line)
i += 1
result = '\n'.join(cleaned_result_lines)
# 6. 清理多余的空白行
result = re.sub(r'\n{3,}', '\n\n', result)
# 7. 如果有爽点分析,添加到末尾作为注释
if shuangdian_sections:
result += '\n\n<!-- 爽点分析:\n'
for section in shuangdian_sections:
result += section + '\n\n'
result += '-->'
return result.strip()
def main():
print("从正文中移除爽点分析内容...")
chapter_files = [f for f in os.listdir(CHAPTERS_DIR) if f.endswith('.md')]
fixed_count = 0
for filename in sorted(chapter_files):
filepath = os.path.join(CHAPTERS_DIR, filename)
print(f"处理: {filename}")
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 检查是否需要修复
needs_fix = False
if '展现重生者的先知优势' in content:
needs_fix = True
if re.search(r'【爽点[一二三四五六七八九十]?[:]', content):
needs_fix = True
if '谈判桌上,陈末掌握着对手的所有底牌' in content:
needs_fix = True
if not needs_fix:
print(f" ✓ 无需修复")
continue
# 创建备份
backup_path = filepath + '.shuangdian.bak'
with open(backup_path, 'w', encoding='utf-8') as f:
f.write(content)
# 修复内容
fixed_content = remove_shuangdian_from_content(content)
# 检查是否有变化
if content != fixed_content:
with open(filepath, 'w', encoding='utf-8') as f:
f.write(fixed_content)
print(f" ✓ 已移除爽点分析内容")
fixed_count += 1
else:
print(f" ✓ 无需修复")
print(f"\n修复完成!共处理了 {len(chapter_files)} 个文件,修复了 {fixed_count} 个文件的爽点分析问题。")
if __name__ == '__main__':
main()