215 lines
6.3 KiB
Python
215 lines
6.3 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
简化版质量检查脚本
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import os
|
|||
|
|
import re
|
|||
|
|
import sys
|
|||
|
|
from datetime import datetime
|
|||
|
|
|
|||
|
|
def count_chinese_chars(text):
|
|||
|
|
"""统计中文字符数"""
|
|||
|
|
return len([c for c in text if '\u4e00' <= c <= '\u9fff'])
|
|||
|
|
|
|||
|
|
def analyze_chapter(filepath):
|
|||
|
|
"""分析章节质量"""
|
|||
|
|
print(f"分析文件:{filepath}")
|
|||
|
|
|
|||
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|||
|
|
content = f.read()
|
|||
|
|
|
|||
|
|
# 提取章节号
|
|||
|
|
filename = os.path.basename(filepath)
|
|||
|
|
match = re.search(r'(\d{4})_', filename)
|
|||
|
|
chapter_num = int(match.group(1)) if match else 0
|
|||
|
|
|
|||
|
|
print(f"章节号:{chapter_num}")
|
|||
|
|
print(f"文件大小:{len(content)} 字符")
|
|||
|
|
|
|||
|
|
# 1. 段落分析
|
|||
|
|
print("\n" + "="*60)
|
|||
|
|
print("段落分析:")
|
|||
|
|
|
|||
|
|
lines = content.split('\n')
|
|||
|
|
paragraphs = []
|
|||
|
|
current_para = []
|
|||
|
|
|
|||
|
|
for line in lines:
|
|||
|
|
stripped = line.strip()
|
|||
|
|
if not stripped:
|
|||
|
|
if current_para:
|
|||
|
|
paragraphs.append(''.join(current_para))
|
|||
|
|
current_para = []
|
|||
|
|
else:
|
|||
|
|
current_para.append(line)
|
|||
|
|
|
|||
|
|
if current_para:
|
|||
|
|
paragraphs.append(''.join(current_para))
|
|||
|
|
|
|||
|
|
# 过滤标题和空段落
|
|||
|
|
filtered_paras = []
|
|||
|
|
for para in paragraphs:
|
|||
|
|
para_stripped = para.strip()
|
|||
|
|
if para_stripped and not para_stripped.startswith('#'):
|
|||
|
|
filtered_paras.append(para_stripped)
|
|||
|
|
|
|||
|
|
# 统计段落长度
|
|||
|
|
short_count = 0
|
|||
|
|
consecutive_short = 0
|
|||
|
|
max_consecutive = 0
|
|||
|
|
current_streak = 0
|
|||
|
|
lengths = []
|
|||
|
|
|
|||
|
|
for para in filtered_paras:
|
|||
|
|
char_count = count_chinese_chars(para)
|
|||
|
|
lengths.append(char_count)
|
|||
|
|
|
|||
|
|
if char_count < 35:
|
|||
|
|
short_count += 1
|
|||
|
|
current_streak += 1
|
|||
|
|
if current_streak > max_consecutive:
|
|||
|
|
max_consecutive = current_streak
|
|||
|
|
else:
|
|||
|
|
current_streak = 0
|
|||
|
|
|
|||
|
|
total_paras = len(filtered_paras)
|
|||
|
|
short_ratio = short_count / total_paras if total_paras > 0 else 0
|
|||
|
|
avg_length = sum(lengths) / len(lengths) if lengths else 0
|
|||
|
|
|
|||
|
|
print(f"总段落数:{total_paras}")
|
|||
|
|
print(f"短段落数(<35字):{short_count}")
|
|||
|
|
print(f"短段落比例:{short_ratio*100:.1f}%")
|
|||
|
|
print(f"最长连续短段落:{max_consecutive}")
|
|||
|
|
print(f"平均段落长度:{avg_length:.1f}字")
|
|||
|
|
|
|||
|
|
# 2. 爽点分析
|
|||
|
|
print("\n" + "="*60)
|
|||
|
|
print("爽点分析:")
|
|||
|
|
|
|||
|
|
golden_keywords = ["打脸", "升级", "收获", "碾压", "反转", "爽点", "优势", "先知", "重生"]
|
|||
|
|
found_keywords = []
|
|||
|
|
|
|||
|
|
for keyword in golden_keywords:
|
|||
|
|
if keyword in content:
|
|||
|
|
found_keywords.append(keyword)
|
|||
|
|
|
|||
|
|
print(f"找到爽点关键词:{len(found_keywords)}/{len(golden_keywords)}")
|
|||
|
|
print(f"关键词:{', '.join(found_keywords)}")
|
|||
|
|
|
|||
|
|
# 3. 对话分析
|
|||
|
|
print("\n" + "="*60)
|
|||
|
|
print("对话分析:")
|
|||
|
|
|
|||
|
|
dialogue_pattern = r'["「](.+?)["」]'
|
|||
|
|
dialogues = re.findall(dialogue_pattern, content)
|
|||
|
|
|
|||
|
|
total_chars = len(content)
|
|||
|
|
dialogue_chars = sum(len(d) for d in dialogues)
|
|||
|
|
dialogue_ratio = dialogue_chars / total_chars if total_chars > 0 else 0
|
|||
|
|
|
|||
|
|
print(f"对话数量:{len(dialogues)}")
|
|||
|
|
print(f"对话比例:{dialogue_ratio*100:.1f}%")
|
|||
|
|
|
|||
|
|
# 4. 问题识别
|
|||
|
|
print("\n" + "="*60)
|
|||
|
|
print("问题识别:")
|
|||
|
|
|
|||
|
|
problems = []
|
|||
|
|
|
|||
|
|
if short_ratio > 0.3:
|
|||
|
|
problems.append(f"短段落比例过高 ({short_ratio*100:.1f}%)")
|
|||
|
|
|
|||
|
|
if max_consecutive > 3:
|
|||
|
|
problems.append(f"连续短段落过多 ({max_consecutive}个)")
|
|||
|
|
|
|||
|
|
if len(found_keywords) < 3:
|
|||
|
|
problems.append(f"爽点不足 (找到{len(found_keywords)}个,需要至少3个)")
|
|||
|
|
|
|||
|
|
if dialogue_ratio < 0.2:
|
|||
|
|
problems.append(f"对话比例偏低 ({dialogue_ratio*100:.1f}%)")
|
|||
|
|
|
|||
|
|
if problems:
|
|||
|
|
print("⚠️ 发现问题:")
|
|||
|
|
for i, problem in enumerate(problems, 1):
|
|||
|
|
print(f" {i}. {problem}")
|
|||
|
|
else:
|
|||
|
|
print("✅ 未发现严重问题")
|
|||
|
|
|
|||
|
|
# 5. 修复建议
|
|||
|
|
print("\n" + "="*60)
|
|||
|
|
print("修复建议:")
|
|||
|
|
|
|||
|
|
recommendations = []
|
|||
|
|
|
|||
|
|
if short_ratio > 0.3:
|
|||
|
|
recommendations.append("合并短段落,提高段落平均长度")
|
|||
|
|
|
|||
|
|
if len(found_keywords) < 3:
|
|||
|
|
if chapter_num == 1:
|
|||
|
|
recommendations.append("第1章需要:1)明确重生优势 2)建立时间紧迫感 3)设置第一个目标")
|
|||
|
|
elif chapter_num <= 3:
|
|||
|
|
recommendations.append("黄金三章需要:1)兑现第一个爽点 2)打脸小反派 3)建立升级体系")
|
|||
|
|
else:
|
|||
|
|
recommendations.append("增加爽点密度:每章至少3个爽点")
|
|||
|
|
|
|||
|
|
if dialogue_ratio < 0.2:
|
|||
|
|
recommendations.append("增加对话比例,目标30-40%")
|
|||
|
|
|
|||
|
|
if recommendations:
|
|||
|
|
for i, rec in enumerate(recommendations, 1):
|
|||
|
|
print(f" {i}. {rec}")
|
|||
|
|
else:
|
|||
|
|
print(" ✅ 无需修复")
|
|||
|
|
|
|||
|
|
# 6. 生成报告
|
|||
|
|
report = {
|
|||
|
|
"chapter": chapter_num,
|
|||
|
|
"file": filename,
|
|||
|
|
"timestamp": datetime.now().isoformat(),
|
|||
|
|
"metrics": {
|
|||
|
|
"paragraphs": {
|
|||
|
|
"total": total_paras,
|
|||
|
|
"short": short_count,
|
|||
|
|
"short_ratio": short_ratio,
|
|||
|
|
"max_consecutive_short": max_consecutive,
|
|||
|
|
"avg_length": avg_length
|
|||
|
|
},
|
|||
|
|
"golden_points": {
|
|||
|
|
"found": len(found_keywords),
|
|||
|
|
"keywords": found_keywords
|
|||
|
|
},
|
|||
|
|
"dialogue": {
|
|||
|
|
"count": len(dialogues),
|
|||
|
|
"ratio": dialogue_ratio
|
|||
|
|
}
|
|||
|
|
},
|
|||
|
|
"problems": problems,
|
|||
|
|
"recommendations": recommendations
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 保存报告
|
|||
|
|
report_file = f"quality_report_ch{chapter_num:04d}.json"
|
|||
|
|
import json
|
|||
|
|
with open(report_file, 'w', encoding='utf-8') as f:
|
|||
|
|
json.dump(report, f, ensure_ascii=False, indent=2)
|
|||
|
|
|
|||
|
|
print(f"\n📄 报告已保存到:{report_file}")
|
|||
|
|
|
|||
|
|
return report
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
if len(sys.argv) < 2:
|
|||
|
|
print("用法:python simple_quality_check.py <章节文件路径>")
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
filepath = sys.argv[1]
|
|||
|
|
|
|||
|
|
if not os.path.exists(filepath):
|
|||
|
|
print(f"错误:文件不存在 - {filepath}")
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
analyze_chapter(filepath)
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|