200 lines
6.5 KiB
Python
Executable File
200 lines
6.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
小说字数统计工具
|
|
统计章节字数、总字数、进度等
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
from datetime import datetime
|
|
import json
|
|
|
|
class WordCounter:
|
|
def __init__(self):
|
|
self.total_words = 0
|
|
self.total_chars = 0
|
|
self.chapter_stats = []
|
|
|
|
def count_file(self, file_path):
|
|
"""统计单个文件"""
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# 计算字数(中文字符+英文单词)
|
|
chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', content))
|
|
english_words = len(re.findall(r'\b[a-zA-Z]+\b', content))
|
|
total_words = chinese_chars + english_words
|
|
|
|
# 计算总字符数
|
|
total_chars = len(content)
|
|
|
|
# 计算段落数
|
|
paragraphs = len([p for p in content.split('\n\n') if p.strip()])
|
|
|
|
# 计算对话行数
|
|
dialogue_lines = len(re.findall(r'["「][^"」]+["」]', content))
|
|
|
|
return {
|
|
"file": file_path,
|
|
"words": total_words,
|
|
"chars": total_chars,
|
|
"paragraphs": paragraphs,
|
|
"dialogue_lines": dialogue_lines,
|
|
"dialogue_ratio": round(dialogue_lines / max(paragraphs, 1), 2)
|
|
}
|
|
except Exception as e:
|
|
print(f"统计文件失败 {file_path}: {e}")
|
|
return None
|
|
|
|
def count_project(self, project_path):
|
|
"""统计整个项目"""
|
|
chapters_dir = os.path.join(project_path, "chapters")
|
|
|
|
if not os.path.exists(chapters_dir):
|
|
print(f"章节目录不存在: {chapters_dir}")
|
|
return None
|
|
|
|
# 获取所有章节文件
|
|
chapter_files = []
|
|
for root, dirs, files in os.walk(chapters_dir):
|
|
for file in files:
|
|
if file.endswith('.md'):
|
|
chapter_files.append(os.path.join(root, file))
|
|
|
|
if not chapter_files:
|
|
print("没有找到章节文件")
|
|
return None
|
|
|
|
# 按文件名排序
|
|
chapter_files.sort()
|
|
|
|
# 统计每个章节
|
|
self.chapter_stats = []
|
|
self.total_words = 0
|
|
self.total_chars = 0
|
|
|
|
for chapter_file in chapter_files:
|
|
stats = self.count_file(chapter_file)
|
|
if stats:
|
|
self.chapter_stats.append(stats)
|
|
self.total_words += stats['words']
|
|
self.total_chars += stats['chars']
|
|
|
|
return {
|
|
"project": project_path,
|
|
"total_chapters": len(self.chapter_stats),
|
|
"total_words": self.total_words,
|
|
"total_chars": self.total_chars,
|
|
"avg_words_per_chapter": round(self.total_words / max(len(self.chapter_stats), 1)),
|
|
"chapters": self.chapter_stats
|
|
}
|
|
|
|
def generate_report(self, project_path, output_path=None):
|
|
"""生成统计报告"""
|
|
stats = self.count_project(project_path)
|
|
if not stats:
|
|
return None
|
|
|
|
# 番茄平台标准
|
|
tomato_standard = 2500 # 每章标准字数
|
|
daily_target = 4000 # 日更目标
|
|
|
|
# 计算进度
|
|
completed_chapters = stats['total_chapters']
|
|
total_words = stats['total_words']
|
|
avg_words = stats['avg_words_per_chapter']
|
|
|
|
# 评估
|
|
if avg_words < 2000:
|
|
word_rating = "⚠️ 字数偏少"
|
|
elif avg_words < 2500:
|
|
word_rating = "✅ 符合标准"
|
|
elif avg_words < 3500:
|
|
word_rating = "✅ 字数充足"
|
|
else:
|
|
word_rating = "⚠️ 字数偏多"
|
|
|
|
# 对话占比评估
|
|
avg_dialogue_ratio = sum(s['dialogue_ratio'] for s in self.chapter_stats) / len(self.chapter_stats)
|
|
if avg_dialogue_ratio < 0.3:
|
|
dialogue_rating = "⚠️ 对话偏少(影响听书分成)"
|
|
elif avg_dialogue_ratio < 0.5:
|
|
dialogue_rating = "✅ 对话适中"
|
|
else:
|
|
dialogue_rating = "✅ 对话丰富(适合听书)"
|
|
|
|
# 生成报告
|
|
report = f"""
|
|
# 小说字数统计报告
|
|
|
|
## 项目信息
|
|
- 项目路径: {project_path}
|
|
- 统计时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
|
- 章节数量: {completed_chapters} 章
|
|
- 总字数: {total_words} 字
|
|
- 总字符数: {stats['total_chars']} 字符
|
|
|
|
## 章节统计
|
|
- 平均每章字数: {avg_words} 字
|
|
- 字数评估: {word_rating}
|
|
- 平均对话占比: {avg_dialogue_ratio:.1%}
|
|
- 对话评估: {dialogue_rating}
|
|
|
|
## 番茄平台适配
|
|
### 字数要求
|
|
- 标准章节字数: 2500-3500字
|
|
- 当前平均: {avg_words} 字
|
|
- 状态: {"符合" if 2000 <= avg_words <= 3500 else "需要调整"}
|
|
|
|
### 更新要求
|
|
- 日更全勤要求: 4000字/天
|
|
- 当前总字数: {total_words} 字
|
|
- 相当于: {total_words // 4000} 天的全勤更新量
|
|
|
|
### 听书优化
|
|
- 推荐对话占比: >30%
|
|
- 当前对话占比: {avg_dialogue_ratio:.1%}
|
|
- 状态: {"适合听书" if avg_dialogue_ratio >= 0.3 else "需要增加对话"}
|
|
|
|
## 详细章节数据
|
|
"""
|
|
# 添加每个章节的详细数据
|
|
for i, chapter in enumerate(self.chapter_stats, 1):
|
|
chapter_name = os.path.basename(chapter['file']).replace('.md', '')
|
|
report += f"\n### 第{i}章: {chapter_name}\n"
|
|
report += f"- 字数: {chapter['words']} 字\n"
|
|
report += f"- 字符: {chapter['chars']} 字符\n"
|
|
report += f"- 段落: {chapter['paragraphs']} 段\n"
|
|
report += f"- 对话行: {chapter['dialogue_lines']} 行\n"
|
|
report += f"- 对话占比: {chapter['dialogue_ratio']:.1%}\n"
|
|
|
|
# 保存报告
|
|
if output_path:
|
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
f.write(report)
|
|
|
|
print(f"报告已保存: {output_path}")
|
|
|
|
return report
|
|
|
|
def main():
|
|
"""主函数"""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="小说字数统计工具")
|
|
parser.add_argument("--project", help="项目路径", default=".")
|
|
parser.add_argument("--output", help="输出报告路径")
|
|
|
|
args = parser.parse_args()
|
|
|
|
counter = WordCounter()
|
|
report = counter.generate_report(args.project, args.output)
|
|
|
|
if report:
|
|
print(report)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|