522 lines
20 KiB
Python
522 lines
20 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
人物档案验证脚本
|
||
验证markdown格式的人物档案结构完整性
|
||
"""
|
||
|
||
import os
|
||
import re
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
class ProfileValidator:
|
||
"""人物档案验证器"""
|
||
|
||
# 各类型角色的必需章节
|
||
REQUIRED_SECTIONS = {
|
||
'protagonist': [
|
||
'基本信息', '外貌特征', '性格特点', '背景故事',
|
||
'动机层次', '人物关系', '故事发展'
|
||
],
|
||
'antagonist': [
|
||
'基本信息', '外貌特征', '性格特点', '核心理念',
|
||
'动机发展', '镜像对比', '资源能力', '故事发展'
|
||
],
|
||
'supporting': [
|
||
'基本定位', '独立身份', '功能性设计', '关系发展',
|
||
'发展可能性'
|
||
],
|
||
'standard': [
|
||
'基本信息', '外貌特征', '性格特点', '背景故事',
|
||
'人物关系', '故事发展'
|
||
]
|
||
}
|
||
|
||
# 章节内的必需字段(根据模板)
|
||
REQUIRED_FIELDS = {
|
||
'基本信息': ['姓名', '年龄', '性别', '职业/身份', '故事中的角色'],
|
||
'外貌特征': ['整体印象', '面部特征', '身材体型', '着装风格'],
|
||
'性格特点': ['核心性格', '优点', '缺点', '价值观'],
|
||
'背景故事': ['出身背景', '关键经历', '转折点'],
|
||
'人物关系': ['与主角关系', '重要关系人'],
|
||
'故事发展': ['角色目标', '内在冲突', '外在冲突', '发展弧线']
|
||
}
|
||
|
||
def __init__(self, profile_type='auto'):
|
||
"""初始化验证器
|
||
|
||
Args:
|
||
profile_type: 档案类型,可选值: 'protagonist', 'antagonist', 'supporting', 'standard', 'auto'
|
||
"""
|
||
self.profile_type = profile_type
|
||
|
||
# 检测平台,Windows上使用简单符号
|
||
self.is_windows = sys.platform.startswith('win')
|
||
|
||
# 符号定义
|
||
if self.is_windows:
|
||
self.symbols = {
|
||
'building': '[结构]',
|
||
'cross_mark': '[缺失]',
|
||
'warning': '[注意]',
|
||
'check': '[通过]',
|
||
'wrench': '[修复]',
|
||
'check_mark': '[OK]',
|
||
'arrow': '->',
|
||
'green_circle': '[良好]',
|
||
'yellow_circle': '[一般]',
|
||
'red_circle': '[需改进]',
|
||
'file': '[文件]',
|
||
'chart': '[统计]',
|
||
'chart2': '[分布]',
|
||
'bulb': '[建议]',
|
||
'bullet': '-',
|
||
'dash': '-'
|
||
}
|
||
else:
|
||
self.symbols = {
|
||
'building': '🏗️',
|
||
'cross_mark': '❌',
|
||
'warning': '⚠️',
|
||
'check': '✅',
|
||
'wrench': '🔧',
|
||
'check_mark': '✓',
|
||
'arrow': '→',
|
||
'green_circle': '🟢',
|
||
'yellow_circle': '🟡',
|
||
'red_circle': '🔴',
|
||
'file': '📋',
|
||
'chart': '📊',
|
||
'chart2': '📈',
|
||
'bulb': '💡',
|
||
'bullet': '•',
|
||
'dash': '-'
|
||
}
|
||
|
||
def detect_profile_type(self, content):
|
||
"""检测档案类型"""
|
||
# 通过内容特征检测类型
|
||
lines = content.split('\n')
|
||
|
||
# 检查是否有特定章节
|
||
has_mirror = any('镜像对比' in line for line in lines)
|
||
has_core_belief = any('核心理念' in line for line in lines)
|
||
has_resources = any('资源能力' in line for line in lines)
|
||
|
||
has_function = any('功能性设计' in line for line in lines)
|
||
has_identity = any('独立身份' in line for line in lines)
|
||
|
||
has_motivation = any('动机层次' in line for line in lines)
|
||
has_core_identity = any('核心身份' in line for line in lines)
|
||
|
||
if has_mirror or has_core_belief or has_resources:
|
||
return 'antagonist'
|
||
elif has_function or has_identity:
|
||
return 'supporting'
|
||
elif has_motivation or has_core_identity:
|
||
return 'protagonist'
|
||
else:
|
||
return 'standard'
|
||
|
||
def validate_structure(self, filepath):
|
||
"""验证档案结构
|
||
|
||
Args:
|
||
filepath: markdown文件路径
|
||
|
||
Returns:
|
||
验证结果字典
|
||
"""
|
||
with open(filepath, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
# 检测类型
|
||
if self.profile_type == 'auto':
|
||
detected_type = self.detect_profile_type(content)
|
||
else:
|
||
detected_type = self.profile_type
|
||
|
||
# 提取所有章节标题
|
||
sections = self._extract_sections(content)
|
||
|
||
# 检查必需章节
|
||
required_sections = self.REQUIRED_SECTIONS.get(detected_type, [])
|
||
missing_sections = []
|
||
present_sections = []
|
||
|
||
for required_section in required_sections:
|
||
if required_section not in sections:
|
||
missing_sections.append(required_section)
|
||
else:
|
||
present_sections.append(required_section)
|
||
|
||
# 检查章节内的必需字段
|
||
section_field_violations = {}
|
||
for section_title in present_sections:
|
||
section_content = self._get_section_content(content, section_title)
|
||
missing_fields = self._check_required_fields(section_title, section_content)
|
||
|
||
if missing_fields:
|
||
section_field_violations[section_title] = missing_fields
|
||
|
||
# 计算结构完整性评分
|
||
structure_score = self._calculate_structure_score(
|
||
len(required_sections), len(missing_sections), section_field_violations
|
||
)
|
||
|
||
return {
|
||
'filepath': filepath,
|
||
'detected_type': detected_type,
|
||
'total_sections_found': len(sections),
|
||
'required_sections': required_sections,
|
||
'present_sections': present_sections,
|
||
'missing_sections': missing_sections,
|
||
'section_field_violations': section_field_violations,
|
||
'structure_score': structure_score,
|
||
'structure_level': self._get_structure_level(structure_score)
|
||
}
|
||
|
||
def _extract_sections(self, content):
|
||
"""提取所有章节标题"""
|
||
sections = []
|
||
|
||
# 匹配二级和三级标题(## 和 ###)
|
||
header_pattern = r'^#{2,3}\s+(.+?)$'
|
||
|
||
lines = content.split('\n')
|
||
for line in lines:
|
||
match = re.match(header_pattern, line.strip())
|
||
if match:
|
||
title = match.group(1).strip()
|
||
# 去掉可能的内部链接
|
||
title = re.sub(r'\[.*?\]\(.*?\)', '', title)
|
||
sections.append(title)
|
||
|
||
return sections
|
||
|
||
def _get_section_content(self, content, section_title):
|
||
"""获取指定章节的内容"""
|
||
lines = content.split('\n')
|
||
in_target_section = False
|
||
section_content = []
|
||
|
||
for line in lines:
|
||
# 检查是否是章节标题
|
||
if re.match(rf'^#{{2,3}}\s+{re.escape(section_title)}\s*$', line.strip()):
|
||
in_target_section = True
|
||
continue
|
||
|
||
# 如果进入下一个章节,停止收集
|
||
if in_target_section and re.match(r'^#{2,3}\s+', line.strip()):
|
||
break
|
||
|
||
# 收集内容行
|
||
if in_target_section:
|
||
section_content.append(line)
|
||
|
||
return '\n'.join(section_content)
|
||
|
||
def _check_required_fields(self, section_title, section_content):
|
||
"""检查章节内的必需字段"""
|
||
required_fields = self.REQUIRED_FIELDS.get(section_title, [])
|
||
if not required_fields:
|
||
return []
|
||
|
||
missing_fields = []
|
||
|
||
for field in required_fields:
|
||
# 检查字段是否出现(作为粗体文本)
|
||
pattern = rf'\*\*{re.escape(field)}\*\*'
|
||
if not re.search(pattern, section_content):
|
||
missing_fields.append(field)
|
||
|
||
return missing_fields
|
||
|
||
def _calculate_structure_score(self, total_required, missing_sections_count, field_violations):
|
||
"""计算结构完整性评分(0-100)"""
|
||
if total_required == 0:
|
||
return 100
|
||
|
||
# 章节完整性(70分)
|
||
section_score = ((total_required - missing_sections_count) / total_required) * 70
|
||
|
||
# 字段完整性(30分)
|
||
field_score = 30
|
||
if field_violations:
|
||
total_violations = sum(len(fields) for fields in field_violations.values())
|
||
# 每个缺失字段扣3分
|
||
field_penalty = min(30, total_violations * 3)
|
||
field_score -= field_penalty
|
||
|
||
total_score = section_score + field_score
|
||
return max(0, min(100, total_score))
|
||
|
||
def _get_structure_level(self, score):
|
||
"""获取结构完整性等级"""
|
||
if score >= 90:
|
||
return "优秀"
|
||
elif score >= 75:
|
||
return "良好"
|
||
elif score >= 60:
|
||
return "一般"
|
||
elif score >= 40:
|
||
return "不完整"
|
||
else:
|
||
return "结构缺失"
|
||
|
||
def generate_validation_report(self, validation_result, output_format='text'):
|
||
"""生成验证报告"""
|
||
result = validation_result
|
||
|
||
if output_format == 'text':
|
||
report_lines = []
|
||
report_lines.append("=" * 60)
|
||
report_lines.append(f"人物档案结构验证报告")
|
||
report_lines.append(f"文件: {result['filepath']}")
|
||
report_lines.append(f"检测类型: {result['detected_type']}")
|
||
report_lines.append("=" * 60)
|
||
report_lines.append("")
|
||
|
||
# 结构完整性
|
||
report_lines.append(f"{self.symbols['building']} 结构完整性")
|
||
report_lines.append(f" 评分: {result['structure_score']:.1f}/100")
|
||
report_lines.append(f" 等级: {result['structure_level']}")
|
||
report_lines.append(f" 发现章节: {result['total_sections_found']}")
|
||
report_lines.append("")
|
||
|
||
# 章节检查
|
||
if result['missing_sections']:
|
||
report_lines.append(f"{self.symbols['cross_mark']} 缺失的必需章节")
|
||
for section in result['missing_sections']:
|
||
report_lines.append(f" {self.symbols['bullet']} {section}")
|
||
report_lines.append("")
|
||
|
||
# 字段检查
|
||
if result['section_field_violations']:
|
||
report_lines.append(f"{self.symbols['warning']} 章节内缺失字段")
|
||
for section, fields in result['section_field_violations'].items():
|
||
report_lines.append(f" {self.symbols['bullet']} {section}:")
|
||
for field in fields:
|
||
report_lines.append(f" {self.symbols['dash']} {field}")
|
||
report_lines.append("")
|
||
|
||
# 通过检查的项目
|
||
report_lines.append(f"{self.symbols['check']} 通过的检查")
|
||
report_lines.append(f" {self.symbols['bullet']} 必需章节: {len(result['present_sections'])}/{len(result['required_sections'])}")
|
||
|
||
present_field_count = 0
|
||
total_field_count = 0
|
||
for section in result['present_sections']:
|
||
required_fields = self.REQUIRED_FIELDS.get(section, [])
|
||
total_field_count += len(required_fields)
|
||
if section not in result['section_field_violations']:
|
||
present_field_count += len(required_fields)
|
||
else:
|
||
missing_count = len(result['section_field_violations'][section])
|
||
present_field_count += (len(required_fields) - missing_count)
|
||
|
||
if total_field_count > 0:
|
||
report_lines.append(f" {self.symbols['bullet']} 必需字段: {present_field_count}/{total_field_count}")
|
||
|
||
report_lines.append("")
|
||
|
||
# 修复建议
|
||
report_lines.append(f"{self.symbols['wrench']} 修复建议")
|
||
if result['structure_score'] >= 80:
|
||
report_lines.append(f" {self.symbols['check_mark']} 结构完整,可以继续完善内容细节")
|
||
elif result['structure_score'] >= 60:
|
||
if result['missing_sections']:
|
||
report_lines.append(f" {self.symbols['arrow']} 添加缺失的章节: {', '.join(result['missing_sections'][:3])}")
|
||
if result['section_field_violations']:
|
||
first_section = list(result['section_field_violations'].keys())[0]
|
||
first_field = result['section_field_violations'][first_section][0]
|
||
report_lines.append(f" {self.symbols['arrow']} 补充字段: {first_section} → **{first_field}**")
|
||
else:
|
||
report_lines.append(f" {self.symbols['arrow']} 需要补充基本的结构框架")
|
||
report_lines.append(f" {self.symbols['arrow']} 建议使用'{result['detected_type']}'模板重新整理")
|
||
|
||
report_lines.append("")
|
||
report_lines.append("=" * 60)
|
||
|
||
return "\n".join(report_lines)
|
||
|
||
elif output_format == 'json':
|
||
import json
|
||
return json.dumps(result, ensure_ascii=False, indent=2)
|
||
|
||
else:
|
||
raise ValueError(f"不支持的输出格式: {output_format}")
|
||
|
||
def validate_directory(self, directory_path, recursive=True):
|
||
"""验证目录下的所有markdown档案"""
|
||
directory = Path(directory_path)
|
||
|
||
if not directory.exists():
|
||
raise FileNotFoundError(f"目录不存在: {directory_path}")
|
||
|
||
# 查找markdown文件
|
||
md_files = []
|
||
if recursive:
|
||
md_files = list(directory.rglob("*.md"))
|
||
else:
|
||
md_files = list(directory.glob("*.md"))
|
||
|
||
if not md_files:
|
||
return {"message": "未找到markdown文件", "files": []}
|
||
|
||
# 验证每个文件
|
||
results = []
|
||
for md_file in md_files:
|
||
try:
|
||
validation = self.validate_structure(str(md_file))
|
||
results.append(validation)
|
||
except Exception as e:
|
||
results.append({
|
||
'filepath': str(md_file),
|
||
'error': str(e)
|
||
})
|
||
|
||
# 按结构评分排序
|
||
valid_results = [r for r in results if 'structure_score' in r]
|
||
sorted_results = sorted(valid_results, key=lambda x: x['structure_score'], reverse=True)
|
||
|
||
return {
|
||
'total_files': len(md_files),
|
||
'successful_validation': len(valid_results),
|
||
'failed_validation': len(results) - len(valid_results),
|
||
'results': sorted_results
|
||
}
|
||
|
||
def generate_directory_validation_report(self, validation_results, output_format='text'):
|
||
"""生成目录验证报告"""
|
||
if output_format == 'text':
|
||
report_lines = []
|
||
report_lines.append("=" * 60)
|
||
report_lines.append(f"人物档案结构验证报告(目录)")
|
||
report_lines.append(f"分析文件数: {validation_results['total_files']}")
|
||
report_lines.append(f"成功验证: {validation_results['successful_validation']}")
|
||
if validation_results['failed_validation'] > 0:
|
||
report_lines.append(f"验证失败: {validation_results['failed_validation']}")
|
||
report_lines.append("=" * 60)
|
||
report_lines.append("")
|
||
|
||
# 文件列表(按评分排序)
|
||
if validation_results['results']:
|
||
report_lines.append(f"{self.symbols['file']} 文件结构完整性排名")
|
||
for i, result in enumerate(validation_results['results'], 1):
|
||
score = result['structure_score']
|
||
level = result['structure_level']
|
||
filename = os.path.basename(result['filepath'])
|
||
profile_type = result.get('detected_type', '未知')
|
||
|
||
# 使用符号表示等级
|
||
if score >= 80:
|
||
icon = self.symbols['green_circle']
|
||
elif score >= 60:
|
||
icon = self.symbols['yellow_circle']
|
||
else:
|
||
icon = self.symbols['red_circle']
|
||
|
||
report_lines.append(f"{icon} {i:2d}. {filename:<35} {score:5.1f}分 ({level}, {profile_type})")
|
||
|
||
report_lines.append("")
|
||
|
||
# 统计信息
|
||
avg_score = sum(r['structure_score'] for r in validation_results['results']) / len(validation_results['results'])
|
||
max_score = max(r['structure_score'] for r in validation_results['results'])
|
||
min_score = min(r['structure_score'] for r in validation_results['results'])
|
||
|
||
# 类型分布
|
||
type_distribution = {}
|
||
for result in validation_results['results']:
|
||
profile_type = result.get('detected_type', '未知')
|
||
type_distribution[profile_type] = type_distribution.get(profile_type, 0) + 1
|
||
|
||
report_lines.append(f"{self.symbols['chart']} 统计信息")
|
||
report_lines.append(f" 平均结构分: {avg_score:.1f}")
|
||
report_lines.append(f" 最高分: {max_score:.1f}")
|
||
report_lines.append(f" 最低分: {min_score:.1f}")
|
||
report_lines.append("")
|
||
|
||
report_lines.append(f"{self.symbols['chart2']} 类型分布")
|
||
for profile_type, count in type_distribution.items():
|
||
percentage = (count / len(validation_results['results'])) * 100
|
||
report_lines.append(f" {profile_type}: {count}个 ({percentage:.1f}%)")
|
||
|
||
report_lines.append("")
|
||
|
||
# 整体建议
|
||
report_lines.append(f"{self.symbols['bulb']} 整体建议")
|
||
if avg_score >= 75:
|
||
report_lines.append(f" {self.symbols['check_mark']} 整体结构良好")
|
||
report_lines.append(f" {self.symbols['arrow']} 可以开始关注内容深度和细节")
|
||
elif avg_score >= 50:
|
||
report_lines.append(f" {self.symbols['warning']} 结构基本完整但有缺失")
|
||
report_lines.append(f" {self.symbols['arrow']} 建议补充缺失章节和字段")
|
||
else:
|
||
report_lines.append(f" {self.symbols['cross_mark']} 结构完整性不足")
|
||
report_lines.append(f" {self.symbols['arrow']} 需要重新整理档案结构框架")
|
||
|
||
report_lines.append("")
|
||
report_lines.append("=" * 60)
|
||
|
||
return "\n".join(report_lines)
|
||
|
||
else:
|
||
import json
|
||
return json.dumps(validation_results, ensure_ascii=False, indent=2)
|
||
|
||
|
||
def main():
|
||
"""主函数"""
|
||
import argparse
|
||
|
||
parser = argparse.ArgumentParser(description='验证人物档案markdown文件结构')
|
||
parser.add_argument('path', help='要验证的markdown文件或目录路径')
|
||
parser.add_argument('--type', '-t', choices=['protagonist', 'antagonist', 'supporting', 'standard', 'auto'],
|
||
default='auto', help='档案类型(默认为自动检测)')
|
||
parser.add_argument('--recursive', '-r', action='store_true', help='递归验证目录')
|
||
parser.add_argument('--format', '-f', choices=['text', 'json'], default='text', help='输出格式')
|
||
parser.add_argument('--output', '-o', help='输出文件路径')
|
||
|
||
args = parser.parse_args()
|
||
|
||
validator = ProfileValidator(args.type)
|
||
path = Path(args.path)
|
||
|
||
try:
|
||
if path.is_file():
|
||
# 验证单个文件
|
||
if path.suffix.lower() != '.md':
|
||
print("错误: 文件必须是.md格式")
|
||
return 1
|
||
|
||
validation = validator.validate_structure(str(path))
|
||
report = validator.generate_validation_report(validation, args.format)
|
||
|
||
elif path.is_dir():
|
||
# 验证目录
|
||
validation_results = validator.validate_directory(str(path), args.recursive)
|
||
report = validator.generate_directory_validation_report(validation_results, args.format)
|
||
|
||
else:
|
||
print(f"错误: 路径不存在: {args.path}")
|
||
return 1
|
||
|
||
# 输出结果
|
||
if args.output:
|
||
with open(args.output, 'w', encoding='utf-8') as f:
|
||
f.write(report)
|
||
print(f"报告已保存到: {args.output}")
|
||
else:
|
||
print(report)
|
||
|
||
return 0
|
||
|
||
except Exception as e:
|
||
print(f"验证失败: {e}")
|
||
return 1
|
||
|
||
|
||
if __name__ == "__main__":
|
||
sys.exit(main()) |