novel-tools/analyzers/word_count.py

#!/usr/bin/env python3
"""
小说字数统计工具
统计章节字数、总字数、进度等
"""

import os
import re
from datetime import datetime
import json

class WordCounter:
    def __init__(self):
        self.total_words = 0
        self.total_chars = 0
        self.chapter_stats = []
    
    def count_file(self, file_path):
        """统计单个文件"""
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
            
            # 计算字数（中文字符+英文单词）
            chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', content))
            english_words = len(re.findall(r'\b[a-zA-Z]+\b', content))
            total_words = chinese_chars + english_words
            
            # 计算总字符数
            total_chars = len(content)
            
            # 计算段落数
            paragraphs = len([p for p in content.split('\n\n') if p.strip()])
            
            # 计算对话行数
            dialogue_lines = len(re.findall(r'["「][^"」]+["」]', content))
            
            return {
                "file": file_path,
                "words": total_words,
                "chars": total_chars,
                "paragraphs": paragraphs,
                "dialogue_lines": dialogue_lines,
                "dialogue_ratio": round(dialogue_lines / max(paragraphs, 1), 2)
            }
        except Exception as e:
            print(f"统计文件失败 {file_path}: {e}")
            return None
    
    def count_project(self, project_path):
        """统计整个项目"""
        chapters_dir = os.path.join(project_path, "chapters")
        
        if not os.path.exists(chapters_dir):
            print(f"章节目录不存在: {chapters_dir}")
            return None
        
        # 获取所有章节文件
        chapter_files = []
        for root, dirs, files in os.walk(chapters_dir):
            for file in files:
                if file.endswith('.md'):
                    chapter_files.append(os.path.join(root, file))
        
        if not chapter_files:
            print("没有找到章节文件")
            return None
        
        # 按文件名排序
        chapter_files.sort()
        
        # 统计每个章节
        self.chapter_stats = []
        self.total_words = 0
        self.total_chars = 0
        
        for chapter_file in chapter_files:
            stats = self.count_file(chapter_file)
            if stats:
                self.chapter_stats.append(stats)
                self.total_words += stats['words']
                self.total_chars += stats['chars']
        
        return {
            "project": project_path,
            "total_chapters": len(self.chapter_stats),
            "total_words": self.total_words,
            "total_chars": self.total_chars,
            "avg_words_per_chapter": round(self.total_words / max(len(self.chapter_stats), 1)),
            "chapters": self.chapter_stats
        }
    
    def generate_report(self, project_path, output_path=None):
        """生成统计报告"""
        stats = self.count_project(project_path)
        if not stats:
            return None
        
        # 番茄平台标准
        tomato_standard = 2500  # 每章标准字数
        daily_target = 4000     # 日更目标
        
        # 计算进度
        completed_chapters = stats['total_chapters']
        total_words = stats['total_words']
        avg_words = stats['avg_words_per_chapter']
        
        # 评估
        if avg_words < 2000:
            word_rating = "⚠️ 字数偏少"
        elif avg_words < 2500:
            word_rating = "✅ 符合标准"
        elif avg_words < 3500:
            word_rating = "✅ 字数充足"
        else:
            word_rating = "⚠️ 字数偏多"
        
        # 对话占比评估
        avg_dialogue_ratio = sum(s['dialogue_ratio'] for s in self.chapter_stats) / len(self.chapter_stats)
        if avg_dialogue_ratio < 0.3:
            dialogue_rating = "⚠️ 对话偏少（影响听书分成）"
        elif avg_dialogue_ratio < 0.5:
            dialogue_rating = "✅ 对话适中"
        else:
            dialogue_rating = "✅ 对话丰富（适合听书）"
        
        # 生成报告
        report = f"""
# 小说字数统计报告

## 项目信息
- 项目路径: {project_path}
- 统计时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
- 章节数量: {completed_chapters} 章
- 总字数: {total_words} 字
- 总字符数: {stats['total_chars']} 字符

## 章节统计
- 平均每章字数: {avg_words} 字
- 字数评估: {word_rating}
- 平均对话占比: {avg_dialogue_ratio:.1%}
- 对话评估: {dialogue_rating}

## 番茄平台适配
### 字数要求
- 标准章节字数: 2500-3500字
- 当前平均: {avg_words} 字
- 状态: {"符合" if 2000 <= avg_words <= 3500 else "需要调整"}

### 更新要求
- 日更全勤要求: 4000字/天
- 当前总字数: {total_words} 字
- 相当于: {total_words // 4000} 天的全勤更新量

### 听书优化
- 推荐对话占比: >30%
- 当前对话占比: {avg_dialogue_ratio:.1%}
- 状态: {"适合听书" if avg_dialogue_ratio >= 0.3 else "需要增加对话"}

## 详细章节数据
"""
        # 添加每个章节的详细数据
        for i, chapter in enumerate(self.chapter_stats, 1):
            chapter_name = os.path.basename(chapter['file']).replace('.md', '')
            report += f"\n### 第{i}章: {chapter_name}\n"
            report += f"- 字数: {chapter['words']} 字\n"
            report += f"- 字符: {chapter['chars']} 字符\n"
            report += f"- 段落: {chapter['paragraphs']} 段\n"
            report += f"- 对话行: {chapter['dialogue_lines']} 行\n"
            report += f"- 对话占比: {chapter['dialogue_ratio']:.1%}\n"
        
        # 保存报告
        if output_path:
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            with open(output_path, 'w', encoding='utf-8') as f:
                f.write(report)
            
            print(f"报告已保存: {output_path}")
        
        return report

def main():
    """主函数"""
    import argparse
    
    parser = argparse.ArgumentParser(description="小说字数统计工具")
    parser.add_argument("--project", help="项目路径", default=".")
    parser.add_argument("--output", help="输出报告路径")
    
    args = parser.parse_args()
    
    counter = WordCounter()
    report = counter.generate_report(args.project, args.output)
    
    if report:
        print(report)

if __name__ == "__main__":
    main()
项目初始化：小说创作工具集 - 创建飞书同步工具 (Python版) - 创建字数统计工具 - 创建章节生成器 - 创建番茄黄金三章模板 - 完善项目文档和结构 - 配置完整的工具链 2026-03-30 12:32:57 +08:00			`#!/usr/bin/env python3`
			`"""`
			`小说字数统计工具`
			`统计章节字数、总字数、进度等`
			`"""`

			`import os`
			`import re`
			`from datetime import datetime`
			`import json`

			`class WordCounter:`
			`def __init__(self):`
			`self.total_words = 0`
			`self.total_chars = 0`
			`self.chapter_stats = []`

			`def count_file(self, file_path):`
			`"""统计单个文件"""`
			`try:`
			`with open(file_path, 'r', encoding='utf-8') as f:`
			`content = f.read()`

			`# 计算字数（中文字符+英文单词）`
			`chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', content))`
			`english_words = len(re.findall(r'\b[a-zA-Z]+\b', content))`
			`total_words = chinese_chars + english_words`

			`# 计算总字符数`
			`total_chars = len(content)`

			`# 计算段落数`
			`paragraphs = len([p for p in content.split('\n\n') if p.strip()])`

			`# 计算对话行数`
			`dialogue_lines = len(re.findall(r'["「][^"」]+["」]', content))`

			`return {`
			`"file": file_path,`
			`"words": total_words,`
			`"chars": total_chars,`
			`"paragraphs": paragraphs,`
			`"dialogue_lines": dialogue_lines,`
			`"dialogue_ratio": round(dialogue_lines / max(paragraphs, 1), 2)`
			`}`
			`except Exception as e:`
			`print(f"统计文件失败 {file_path}: {e}")`
			`return None`

			`def count_project(self, project_path):`
			`"""统计整个项目"""`
			`chapters_dir = os.path.join(project_path, "chapters")`

			`if not os.path.exists(chapters_dir):`
			`print(f"章节目录不存在: {chapters_dir}")`
			`return None`

			`# 获取所有章节文件`
			`chapter_files = []`
			`for root, dirs, files in os.walk(chapters_dir):`
			`for file in files:`
			`if file.endswith('.md'):`
			`chapter_files.append(os.path.join(root, file))`

			`if not chapter_files:`
			`print("没有找到章节文件")`
			`return None`

			`# 按文件名排序`
			`chapter_files.sort()`

			`# 统计每个章节`
			`self.chapter_stats = []`
			`self.total_words = 0`
			`self.total_chars = 0`

			`for chapter_file in chapter_files:`
			`stats = self.count_file(chapter_file)`
			`if stats:`
			`self.chapter_stats.append(stats)`
			`self.total_words += stats['words']`
			`self.total_chars += stats['chars']`

			`return {`
			`"project": project_path,`
			`"total_chapters": len(self.chapter_stats),`
			`"total_words": self.total_words,`
			`"total_chars": self.total_chars,`
			`"avg_words_per_chapter": round(self.total_words / max(len(self.chapter_stats), 1)),`
			`"chapters": self.chapter_stats`
			`}`

			`def generate_report(self, project_path, output_path=None):`
			`"""生成统计报告"""`
			`stats = self.count_project(project_path)`
			`if not stats:`
			`return None`

			`# 番茄平台标准`
			`tomato_standard = 2500 # 每章标准字数`
			`daily_target = 4000 # 日更目标`

			`# 计算进度`
			`completed_chapters = stats['total_chapters']`
			`total_words = stats['total_words']`
			`avg_words = stats['avg_words_per_chapter']`

			`# 评估`
			`if avg_words < 2000:`
			`word_rating = "⚠️ 字数偏少"`
			`elif avg_words < 2500:`
			`word_rating = "✅ 符合标准"`
			`elif avg_words < 3500:`
			`word_rating = "✅ 字数充足"`
			`else:`
			`word_rating = "⚠️ 字数偏多"`

			`# 对话占比评估`
			`avg_dialogue_ratio = sum(s['dialogue_ratio'] for s in self.chapter_stats) / len(self.chapter_stats)`
			`if avg_dialogue_ratio < 0.3:`
			`dialogue_rating = "⚠️ 对话偏少（影响听书分成）"`
			`elif avg_dialogue_ratio < 0.5:`
			`dialogue_rating = "✅ 对话适中"`
			`else:`
			`dialogue_rating = "✅ 对话丰富（适合听书）"`

			`# 生成报告`
			`report = f"""`
			`# 小说字数统计报告`

			`## 项目信息`
			`- 项目路径: {project_path}`
			`- 统计时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}`
			`- 章节数量: {completed_chapters} 章`
			`- 总字数: {total_words} 字`
			`- 总字符数: {stats['total_chars']} 字符`

			`## 章节统计`
			`- 平均每章字数: {avg_words} 字`
			`- 字数评估: {word_rating}`
			`- 平均对话占比: {avg_dialogue_ratio:.1%}`
			`- 对话评估: {dialogue_rating}`

			`## 番茄平台适配`
			`### 字数要求`
			`- 标准章节字数: 2500-3500字`
			`- 当前平均: {avg_words} 字`
			`- 状态: {"符合" if 2000 <= avg_words <= 3500 else "需要调整"}`

			`### 更新要求`
			`- 日更全勤要求: 4000字/天`
			`- 当前总字数: {total_words} 字`
			`- 相当于: {total_words // 4000} 天的全勤更新量`

			`### 听书优化`
			`- 推荐对话占比: >30%`
			`- 当前对话占比: {avg_dialogue_ratio:.1%}`
			`- 状态: {"适合听书" if avg_dialogue_ratio >= 0.3 else "需要增加对话"}`

			`## 详细章节数据`
			`"""`
			`# 添加每个章节的详细数据`
			`for i, chapter in enumerate(self.chapter_stats, 1):`
			`chapter_name = os.path.basename(chapter['file']).replace('.md', '')`
			`report += f"\n### 第{i}章: {chapter_name}\n"`
			`report += f"- 字数: {chapter['words']} 字\n"`
			`report += f"- 字符: {chapter['chars']} 字符\n"`
			`report += f"- 段落: {chapter['paragraphs']} 段\n"`
			`report += f"- 对话行: {chapter['dialogue_lines']} 行\n"`
			`report += f"- 对话占比: {chapter['dialogue_ratio']:.1%}\n"`

			`# 保存报告`
			`if output_path:`
			`os.makedirs(os.path.dirname(output_path), exist_ok=True)`
			`with open(output_path, 'w', encoding='utf-8') as f:`
			`f.write(report)`

			`print(f"报告已保存: {output_path}")`

			`return report`

			`def main():`
			`"""主函数"""`
			`import argparse`

			`parser = argparse.ArgumentParser(description="小说字数统计工具")`
			`parser.add_argument("--project", help="项目路径", default=".")`
			`parser.add_argument("--output", help="输出报告路径")`

			`args = parser.parse_args()`

			`counter = WordCounter()`
			`report = counter.generate_report(args.project, args.output)`

			`if report:`
			`print(report)`

			`if __name__ == "__main__":`
			`main()`