novel-doomsday-resurgence/scripts/simple_quality_check.py

215 lines
6.3 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
简化版质量检查脚本
"""
import os
import re
import sys
from datetime import datetime
def count_chinese_chars(text):
"""统计中文字符数"""
return len([c for c in text if '\u4e00' <= c <= '\u9fff'])
def analyze_chapter(filepath):
"""分析章节质量"""
print(f"分析文件:{filepath}")
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 提取章节号
filename = os.path.basename(filepath)
match = re.search(r'(\d{4})_', filename)
chapter_num = int(match.group(1)) if match else 0
print(f"章节号:{chapter_num}")
print(f"文件大小:{len(content)} 字符")
# 1. 段落分析
print("\n" + "="*60)
print("段落分析:")
lines = content.split('\n')
paragraphs = []
current_para = []
for line in lines:
stripped = line.strip()
if not stripped:
if current_para:
paragraphs.append(''.join(current_para))
current_para = []
else:
current_para.append(line)
if current_para:
paragraphs.append(''.join(current_para))
# 过滤标题和空段落
filtered_paras = []
for para in paragraphs:
para_stripped = para.strip()
if para_stripped and not para_stripped.startswith('#'):
filtered_paras.append(para_stripped)
# 统计段落长度
short_count = 0
consecutive_short = 0
max_consecutive = 0
current_streak = 0
lengths = []
for para in filtered_paras:
char_count = count_chinese_chars(para)
lengths.append(char_count)
if char_count < 35:
short_count += 1
current_streak += 1
if current_streak > max_consecutive:
max_consecutive = current_streak
else:
current_streak = 0
total_paras = len(filtered_paras)
short_ratio = short_count / total_paras if total_paras > 0 else 0
avg_length = sum(lengths) / len(lengths) if lengths else 0
print(f"总段落数:{total_paras}")
print(f"短段落数(<35字){short_count}")
print(f"短段落比例:{short_ratio*100:.1f}%")
print(f"最长连续短段落:{max_consecutive}")
print(f"平均段落长度:{avg_length:.1f}")
# 2. 爽点分析
print("\n" + "="*60)
print("爽点分析:")
golden_keywords = ["打脸", "升级", "收获", "碾压", "反转", "爽点", "优势", "先知", "重生"]
found_keywords = []
for keyword in golden_keywords:
if keyword in content:
found_keywords.append(keyword)
print(f"找到爽点关键词:{len(found_keywords)}/{len(golden_keywords)}")
print(f"关键词:{', '.join(found_keywords)}")
# 3. 对话分析
print("\n" + "="*60)
print("对话分析:")
dialogue_pattern = r'["「](.+?)["」]'
dialogues = re.findall(dialogue_pattern, content)
total_chars = len(content)
dialogue_chars = sum(len(d) for d in dialogues)
dialogue_ratio = dialogue_chars / total_chars if total_chars > 0 else 0
print(f"对话数量:{len(dialogues)}")
print(f"对话比例:{dialogue_ratio*100:.1f}%")
# 4. 问题识别
print("\n" + "="*60)
print("问题识别:")
problems = []
if short_ratio > 0.3:
problems.append(f"短段落比例过高 ({short_ratio*100:.1f}%)")
if max_consecutive > 3:
problems.append(f"连续短段落过多 ({max_consecutive}个)")
if len(found_keywords) < 3:
problems.append(f"爽点不足 (找到{len(found_keywords)}需要至少3个)")
if dialogue_ratio < 0.2:
problems.append(f"对话比例偏低 ({dialogue_ratio*100:.1f}%)")
if problems:
print("⚠️ 发现问题:")
for i, problem in enumerate(problems, 1):
print(f" {i}. {problem}")
else:
print("✅ 未发现严重问题")
# 5. 修复建议
print("\n" + "="*60)
print("修复建议:")
recommendations = []
if short_ratio > 0.3:
recommendations.append("合并短段落,提高段落平均长度")
if len(found_keywords) < 3:
if chapter_num == 1:
recommendations.append("第1章需要1)明确重生优势 2)建立时间紧迫感 3)设置第一个目标")
elif chapter_num <= 3:
recommendations.append("黄金三章需要1)兑现第一个爽点 2)打脸小反派 3)建立升级体系")
else:
recommendations.append("增加爽点密度每章至少3个爽点")
if dialogue_ratio < 0.2:
recommendations.append("增加对话比例目标30-40%")
if recommendations:
for i, rec in enumerate(recommendations, 1):
print(f" {i}. {rec}")
else:
print(" ✅ 无需修复")
# 6. 生成报告
report = {
"chapter": chapter_num,
"file": filename,
"timestamp": datetime.now().isoformat(),
"metrics": {
"paragraphs": {
"total": total_paras,
"short": short_count,
"short_ratio": short_ratio,
"max_consecutive_short": max_consecutive,
"avg_length": avg_length
},
"golden_points": {
"found": len(found_keywords),
"keywords": found_keywords
},
"dialogue": {
"count": len(dialogues),
"ratio": dialogue_ratio
}
},
"problems": problems,
"recommendations": recommendations
}
# 保存报告
report_file = f"quality_report_ch{chapter_num:04d}.json"
import json
with open(report_file, 'w', encoding='utf-8') as f:
json.dump(report, f, ensure_ascii=False, indent=2)
print(f"\n📄 报告已保存到:{report_file}")
return report
def main():
if len(sys.argv) < 2:
print("用法python simple_quality_check.py <章节文件路径>")
sys.exit(1)
filepath = sys.argv[1]
if not os.path.exists(filepath):
print(f"错误:文件不存在 - {filepath}")
sys.exit(1)
analyze_chapter(filepath)
if __name__ == "__main__":
main()