#!/usr/bin/env python3 """ 小说字数统计工具 统计章节字数、总字数、进度等 """ import os import re from datetime import datetime import json class WordCounter: def __init__(self): self.total_words = 0 self.total_chars = 0 self.chapter_stats = [] def count_file(self, file_path): """统计单个文件""" try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # 计算字数(中文字符+英文单词) chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', content)) english_words = len(re.findall(r'\b[a-zA-Z]+\b', content)) total_words = chinese_chars + english_words # 计算总字符数 total_chars = len(content) # 计算段落数 paragraphs = len([p for p in content.split('\n\n') if p.strip()]) # 计算对话行数 dialogue_lines = len(re.findall(r'["「][^"」]+["」]', content)) return { "file": file_path, "words": total_words, "chars": total_chars, "paragraphs": paragraphs, "dialogue_lines": dialogue_lines, "dialogue_ratio": round(dialogue_lines / max(paragraphs, 1), 2) } except Exception as e: print(f"统计文件失败 {file_path}: {e}") return None def count_project(self, project_path): """统计整个项目""" chapters_dir = os.path.join(project_path, "chapters") if not os.path.exists(chapters_dir): print(f"章节目录不存在: {chapters_dir}") return None # 获取所有章节文件 chapter_files = [] for root, dirs, files in os.walk(chapters_dir): for file in files: if file.endswith('.md'): chapter_files.append(os.path.join(root, file)) if not chapter_files: print("没有找到章节文件") return None # 按文件名排序 chapter_files.sort() # 统计每个章节 self.chapter_stats = [] self.total_words = 0 self.total_chars = 0 for chapter_file in chapter_files: stats = self.count_file(chapter_file) if stats: self.chapter_stats.append(stats) self.total_words += stats['words'] self.total_chars += stats['chars'] return { "project": project_path, "total_chapters": len(self.chapter_stats), "total_words": self.total_words, "total_chars": self.total_chars, "avg_words_per_chapter": round(self.total_words / max(len(self.chapter_stats), 1)), "chapters": self.chapter_stats } def generate_report(self, project_path, output_path=None): """生成统计报告""" stats = self.count_project(project_path) if not stats: return None # 番茄平台标准 tomato_standard = 2500 # 每章标准字数 daily_target = 4000 # 日更目标 # 计算进度 completed_chapters = stats['total_chapters'] total_words = stats['total_words'] avg_words = stats['avg_words_per_chapter'] # 评估 if avg_words < 2000: word_rating = "⚠️ 字数偏少" elif avg_words < 2500: word_rating = "✅ 符合标准" elif avg_words < 3500: word_rating = "✅ 字数充足" else: word_rating = "⚠️ 字数偏多" # 对话占比评估 avg_dialogue_ratio = sum(s['dialogue_ratio'] for s in self.chapter_stats) / len(self.chapter_stats) if avg_dialogue_ratio < 0.3: dialogue_rating = "⚠️ 对话偏少(影响听书分成)" elif avg_dialogue_ratio < 0.5: dialogue_rating = "✅ 对话适中" else: dialogue_rating = "✅ 对话丰富(适合听书)" # 生成报告 report = f""" # 小说字数统计报告 ## 项目信息 - 项目路径: {project_path} - 统计时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - 章节数量: {completed_chapters} 章 - 总字数: {total_words} 字 - 总字符数: {stats['total_chars']} 字符 ## 章节统计 - 平均每章字数: {avg_words} 字 - 字数评估: {word_rating} - 平均对话占比: {avg_dialogue_ratio:.1%} - 对话评估: {dialogue_rating} ## 番茄平台适配 ### 字数要求 - 标准章节字数: 2500-3500字 - 当前平均: {avg_words} 字 - 状态: {"符合" if 2000 <= avg_words <= 3500 else "需要调整"} ### 更新要求 - 日更全勤要求: 4000字/天 - 当前总字数: {total_words} 字 - 相当于: {total_words // 4000} 天的全勤更新量 ### 听书优化 - 推荐对话占比: >30% - 当前对话占比: {avg_dialogue_ratio:.1%} - 状态: {"适合听书" if avg_dialogue_ratio >= 0.3 else "需要增加对话"} ## 详细章节数据 """ # 添加每个章节的详细数据 for i, chapter in enumerate(self.chapter_stats, 1): chapter_name = os.path.basename(chapter['file']).replace('.md', '') report += f"\n### 第{i}章: {chapter_name}\n" report += f"- 字数: {chapter['words']} 字\n" report += f"- 字符: {chapter['chars']} 字符\n" report += f"- 段落: {chapter['paragraphs']} 段\n" report += f"- 对话行: {chapter['dialogue_lines']} 行\n" report += f"- 对话占比: {chapter['dialogue_ratio']:.1%}\n" # 保存报告 if output_path: os.makedirs(os.path.dirname(output_path), exist_ok=True) with open(output_path, 'w', encoding='utf-8') as f: f.write(report) print(f"报告已保存: {output_path}") return report def main(): """主函数""" import argparse parser = argparse.ArgumentParser(description="小说字数统计工具") parser.add_argument("--project", help="项目路径", default=".") parser.add_argument("--output", help="输出报告路径") args = parser.parse_args() counter = WordCounter() report = counter.generate_report(args.project, args.output) if report: print(report) if __name__ == "__main__": main()