novel-tools/analyzers/word_count.py

200 lines
6.5 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
小说字数统计工具
统计章节字数总字数进度等
"""
import os
import re
from datetime import datetime
import json
class WordCounter:
def __init__(self):
self.total_words = 0
self.total_chars = 0
self.chapter_stats = []
def count_file(self, file_path):
"""统计单个文件"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 计算字数(中文字符+英文单词)
chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', content))
english_words = len(re.findall(r'\b[a-zA-Z]+\b', content))
total_words = chinese_chars + english_words
# 计算总字符数
total_chars = len(content)
# 计算段落数
paragraphs = len([p for p in content.split('\n\n') if p.strip()])
# 计算对话行数
dialogue_lines = len(re.findall(r'["「][^"」]+["」]', content))
return {
"file": file_path,
"words": total_words,
"chars": total_chars,
"paragraphs": paragraphs,
"dialogue_lines": dialogue_lines,
"dialogue_ratio": round(dialogue_lines / max(paragraphs, 1), 2)
}
except Exception as e:
print(f"统计文件失败 {file_path}: {e}")
return None
def count_project(self, project_path):
"""统计整个项目"""
chapters_dir = os.path.join(project_path, "chapters")
if not os.path.exists(chapters_dir):
print(f"章节目录不存在: {chapters_dir}")
return None
# 获取所有章节文件
chapter_files = []
for root, dirs, files in os.walk(chapters_dir):
for file in files:
if file.endswith('.md'):
chapter_files.append(os.path.join(root, file))
if not chapter_files:
print("没有找到章节文件")
return None
# 按文件名排序
chapter_files.sort()
# 统计每个章节
self.chapter_stats = []
self.total_words = 0
self.total_chars = 0
for chapter_file in chapter_files:
stats = self.count_file(chapter_file)
if stats:
self.chapter_stats.append(stats)
self.total_words += stats['words']
self.total_chars += stats['chars']
return {
"project": project_path,
"total_chapters": len(self.chapter_stats),
"total_words": self.total_words,
"total_chars": self.total_chars,
"avg_words_per_chapter": round(self.total_words / max(len(self.chapter_stats), 1)),
"chapters": self.chapter_stats
}
def generate_report(self, project_path, output_path=None):
"""生成统计报告"""
stats = self.count_project(project_path)
if not stats:
return None
# 番茄平台标准
tomato_standard = 2500 # 每章标准字数
daily_target = 4000 # 日更目标
# 计算进度
completed_chapters = stats['total_chapters']
total_words = stats['total_words']
avg_words = stats['avg_words_per_chapter']
# 评估
if avg_words < 2000:
word_rating = "⚠️ 字数偏少"
elif avg_words < 2500:
word_rating = "✅ 符合标准"
elif avg_words < 3500:
word_rating = "✅ 字数充足"
else:
word_rating = "⚠️ 字数偏多"
# 对话占比评估
avg_dialogue_ratio = sum(s['dialogue_ratio'] for s in self.chapter_stats) / len(self.chapter_stats)
if avg_dialogue_ratio < 0.3:
dialogue_rating = "⚠️ 对话偏少(影响听书分成)"
elif avg_dialogue_ratio < 0.5:
dialogue_rating = "✅ 对话适中"
else:
dialogue_rating = "✅ 对话丰富(适合听书)"
# 生成报告
report = f"""
# 小说字数统计报告
## 项目信息
- 项目路径: {project_path}
- 统计时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
- 章节数量: {completed_chapters}
- 总字数: {total_words}
- 总字符数: {stats['total_chars']} 字符
## 章节统计
- 平均每章字数: {avg_words}
- 字数评估: {word_rating}
- 平均对话占比: {avg_dialogue_ratio:.1%}
- 对话评估: {dialogue_rating}
## 番茄平台适配
### 字数要求
- 标准章节字数: 2500-3500
- 当前平均: {avg_words}
- 状态: {"符合" if 2000 <= avg_words <= 3500 else "需要调整"}
### 更新要求
- 日更全勤要求: 4000/
- 当前总字数: {total_words}
- 相当于: {total_words // 4000} 天的全勤更新量
### 听书优化
- 推荐对话占比: >30%
- 当前对话占比: {avg_dialogue_ratio:.1%}
- 状态: {"适合听书" if avg_dialogue_ratio >= 0.3 else "需要增加对话"}
## 详细章节数据
"""
# 添加每个章节的详细数据
for i, chapter in enumerate(self.chapter_stats, 1):
chapter_name = os.path.basename(chapter['file']).replace('.md', '')
report += f"\n### 第{i}章: {chapter_name}\n"
report += f"- 字数: {chapter['words']}\n"
report += f"- 字符: {chapter['chars']} 字符\n"
report += f"- 段落: {chapter['paragraphs']}\n"
report += f"- 对话行: {chapter['dialogue_lines']}\n"
report += f"- 对话占比: {chapter['dialogue_ratio']:.1%}\n"
# 保存报告
if output_path:
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(report)
print(f"报告已保存: {output_path}")
return report
def main():
"""主函数"""
import argparse
parser = argparse.ArgumentParser(description="小说字数统计工具")
parser.add_argument("--project", help="项目路径", default=".")
parser.add_argument("--output", help="输出报告路径")
args = parser.parse_args()
counter = WordCounter()
report = counter.generate_report(args.project, args.output)
if report:
print(report)
if __name__ == "__main__":
main()