novel-doomsday-resurgence/.novel-sync-temp/tomato-novel/scripts/simple_quality_check.py
唐天洛 5dc8c00de0 feat(sync): 固化小说内容到Git仓库
📚 小说内容:
- 《末日重生-开局囤货十亿物资》33章
- 完整的状态文件、记忆索引、钩子系统

🛠️ 系统配置:
- 版本控制管理系统
- 自动化脚本系统
- 质量监控系统

🧠 固化记忆:
- 长期记忆文件
- 系统配置文档
- 恢复流程指南

💾 数据安全:
- 本地备份系统
- Git版本控制
- 远程同步机制

同步时间: 2026-03-30 16:25:35
系统状态: inkos正常运行中 (PID: 1433309)
创作进度: 第33章《油粮》创作中
2026-03-30 16:25:35 +08:00

215 lines
6.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
简化版质量检查脚本
"""
import os
import re
import sys
from datetime import datetime
def count_chinese_chars(text):
"""统计中文字符数"""
return len([c for c in text if '\u4e00' <= c <= '\u9fff'])
def analyze_chapter(filepath):
"""分析章节质量"""
print(f"分析文件:{filepath}")
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# 提取章节号
filename = os.path.basename(filepath)
match = re.search(r'(\d{4})_', filename)
chapter_num = int(match.group(1)) if match else 0
print(f"章节号:{chapter_num}")
print(f"文件大小:{len(content)} 字符")
# 1. 段落分析
print("\n" + "="*60)
print("段落分析:")
lines = content.split('\n')
paragraphs = []
current_para = []
for line in lines:
stripped = line.strip()
if not stripped:
if current_para:
paragraphs.append(''.join(current_para))
current_para = []
else:
current_para.append(line)
if current_para:
paragraphs.append(''.join(current_para))
# 过滤标题和空段落
filtered_paras = []
for para in paragraphs:
para_stripped = para.strip()
if para_stripped and not para_stripped.startswith('#'):
filtered_paras.append(para_stripped)
# 统计段落长度
short_count = 0
consecutive_short = 0
max_consecutive = 0
current_streak = 0
lengths = []
for para in filtered_paras:
char_count = count_chinese_chars(para)
lengths.append(char_count)
if char_count < 35:
short_count += 1
current_streak += 1
if current_streak > max_consecutive:
max_consecutive = current_streak
else:
current_streak = 0
total_paras = len(filtered_paras)
short_ratio = short_count / total_paras if total_paras > 0 else 0
avg_length = sum(lengths) / len(lengths) if lengths else 0
print(f"总段落数:{total_paras}")
print(f"短段落数(<35字){short_count}")
print(f"短段落比例:{short_ratio*100:.1f}%")
print(f"最长连续短段落:{max_consecutive}")
print(f"平均段落长度:{avg_length:.1f}")
# 2. 爽点分析
print("\n" + "="*60)
print("爽点分析:")
golden_keywords = ["打脸", "升级", "收获", "碾压", "反转", "爽点", "优势", "先知", "重生"]
found_keywords = []
for keyword in golden_keywords:
if keyword in content:
found_keywords.append(keyword)
print(f"找到爽点关键词:{len(found_keywords)}/{len(golden_keywords)}")
print(f"关键词:{', '.join(found_keywords)}")
# 3. 对话分析
print("\n" + "="*60)
print("对话分析:")
dialogue_pattern = r'["「](.+?)["」]'
dialogues = re.findall(dialogue_pattern, content)
total_chars = len(content)
dialogue_chars = sum(len(d) for d in dialogues)
dialogue_ratio = dialogue_chars / total_chars if total_chars > 0 else 0
print(f"对话数量:{len(dialogues)}")
print(f"对话比例:{dialogue_ratio*100:.1f}%")
# 4. 问题识别
print("\n" + "="*60)
print("问题识别:")
problems = []
if short_ratio > 0.3:
problems.append(f"短段落比例过高 ({short_ratio*100:.1f}%)")
if max_consecutive > 3:
problems.append(f"连续短段落过多 ({max_consecutive}个)")
if len(found_keywords) < 3:
problems.append(f"爽点不足 (找到{len(found_keywords)}需要至少3个)")
if dialogue_ratio < 0.2:
problems.append(f"对话比例偏低 ({dialogue_ratio*100:.1f}%)")
if problems:
print("⚠️ 发现问题:")
for i, problem in enumerate(problems, 1):
print(f" {i}. {problem}")
else:
print("✅ 未发现严重问题")
# 5. 修复建议
print("\n" + "="*60)
print("修复建议:")
recommendations = []
if short_ratio > 0.3:
recommendations.append("合并短段落,提高段落平均长度")
if len(found_keywords) < 3:
if chapter_num == 1:
recommendations.append("第1章需要1)明确重生优势 2)建立时间紧迫感 3)设置第一个目标")
elif chapter_num <= 3:
recommendations.append("黄金三章需要1)兑现第一个爽点 2)打脸小反派 3)建立升级体系")
else:
recommendations.append("增加爽点密度每章至少3个爽点")
if dialogue_ratio < 0.2:
recommendations.append("增加对话比例目标30-40%")
if recommendations:
for i, rec in enumerate(recommendations, 1):
print(f" {i}. {rec}")
else:
print(" ✅ 无需修复")
# 6. 生成报告
report = {
"chapter": chapter_num,
"file": filename,
"timestamp": datetime.now().isoformat(),
"metrics": {
"paragraphs": {
"total": total_paras,
"short": short_count,
"short_ratio": short_ratio,
"max_consecutive_short": max_consecutive,
"avg_length": avg_length
},
"golden_points": {
"found": len(found_keywords),
"keywords": found_keywords
},
"dialogue": {
"count": len(dialogues),
"ratio": dialogue_ratio
}
},
"problems": problems,
"recommendations": recommendations
}
# 保存报告
report_file = f"quality_report_ch{chapter_num:04d}.json"
import json
with open(report_file, 'w', encoding='utf-8') as f:
json.dump(report, f, ensure_ascii=False, indent=2)
print(f"\n📄 报告已保存到:{report_file}")
return report
def main():
if len(sys.argv) < 2:
print("用法python simple_quality_check.py <章节文件路径>")
sys.exit(1)
filepath = sys.argv[1]
if not os.path.exists(filepath):
print(f"错误:文件不存在 - {filepath}")
sys.exit(1)
analyze_chapter(filepath)
if __name__ == "__main__":
main()