261 lines
9.4 KiB
Python
261 lines
9.4 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Novel Character Relationship Graph Generator (Corrected Names)
|
||
|
|
|
||
|
|
Extracts entities and relationships from novel character notes
|
||
|
|
and generates visualizable relationship graph.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import os
|
||
|
|
import re
|
||
|
|
import json
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import Dict, List, Any
|
||
|
|
|
||
|
|
class NovelCharacterGraph:
|
||
|
|
def __init__(self, characters_path: str):
|
||
|
|
self.characters_path = Path(characters_path)
|
||
|
|
self.characters = {}
|
||
|
|
self.relationships = []
|
||
|
|
|
||
|
|
def load_characters(self):
|
||
|
|
"""Load all character markdown files"""
|
||
|
|
md_files = list(self.characters_path.rglob('*.md'))
|
||
|
|
for md_file in md_files:
|
||
|
|
self.load_character_file(md_file)
|
||
|
|
|
||
|
|
def load_character_file(self, file_path: Path):
|
||
|
|
"""Load and parse a character file"""
|
||
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||
|
|
content = f.read()
|
||
|
|
|
||
|
|
# Extract name (first H1)
|
||
|
|
title_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
|
||
|
|
name = title_match.group(1) if title_match else file_path.stem
|
||
|
|
|
||
|
|
# Skip relationship graph file
|
||
|
|
if '关系图谱' in name:
|
||
|
|
return
|
||
|
|
|
||
|
|
# Generate ID
|
||
|
|
char_id = self.generate_id(name)
|
||
|
|
|
||
|
|
# Extract properties
|
||
|
|
properties = {
|
||
|
|
'name': name,
|
||
|
|
'file': str(file_path.relative_to(self.characters_path.parent))
|
||
|
|
}
|
||
|
|
|
||
|
|
# Extract age
|
||
|
|
age_match = re.search(r'\*\*年龄\*\*:\s*(\d+)', content)
|
||
|
|
if age_match:
|
||
|
|
properties['age'] = age_match.group(1)
|
||
|
|
|
||
|
|
# Extract gender
|
||
|
|
gender_match = re.search(r'\*\*性别\*\*:\s*([男女])', content)
|
||
|
|
if gender_match:
|
||
|
|
properties['gender'] = gender_match.group(1)
|
||
|
|
|
||
|
|
# Extract occupation
|
||
|
|
occupation_match = re.search(r'\*\*职业\*\*:\s*([^\n]+)', content)
|
||
|
|
if occupation_match:
|
||
|
|
properties['occupation'] = occupation_match.group(1).strip()
|
||
|
|
|
||
|
|
# Extract status
|
||
|
|
status_match = re.search(r'\*\*状态\*\*:\s*([^\n]+)', content)
|
||
|
|
if status_match:
|
||
|
|
properties['status'] = status_match.group(1).strip()
|
||
|
|
|
||
|
|
# Extract relationships from [[link]] format
|
||
|
|
link_matches = re.findall(r'\[\[([^\]]+)\]\]', content)
|
||
|
|
for link in link_matches:
|
||
|
|
link_id = self.generate_id(link)
|
||
|
|
self.relationships.append({
|
||
|
|
'from': char_id,
|
||
|
|
'to': link_id,
|
||
|
|
'type': 'mentioned'
|
||
|
|
})
|
||
|
|
|
||
|
|
# Extract specific relationships from content
|
||
|
|
self.extract_relationships_from_content(content, char_id)
|
||
|
|
|
||
|
|
# Add character
|
||
|
|
self.characters[char_id] = {
|
||
|
|
'id': char_id,
|
||
|
|
'type': 'Character',
|
||
|
|
'properties': properties
|
||
|
|
}
|
||
|
|
|
||
|
|
def extract_relationships_from_content(self, content: str, char_id: str):
|
||
|
|
"""Extract relationships from content sections"""
|
||
|
|
# Family relationships
|
||
|
|
if '## 家庭关系' in content:
|
||
|
|
family_section = re.search(r'## 家庭关系\s*\n(.*?)(?=##|$)', content, re.DOTALL)
|
||
|
|
if family_section:
|
||
|
|
# Extract mother
|
||
|
|
mother_match = re.search(r'- \*\*母亲\*\*: \[\[([^\]]+)\]\]', family_section.group(1))
|
||
|
|
if mother_match:
|
||
|
|
mother_id = self.generate_id(mother_match.group(1))
|
||
|
|
self.relationships.append({
|
||
|
|
'from': char_id,
|
||
|
|
'to': mother_id,
|
||
|
|
'type': 'mother_of'
|
||
|
|
})
|
||
|
|
|
||
|
|
# Extract father
|
||
|
|
father_match = re.search(r'- \*\*父亲\*\*: \[\[([^\]]+)\]\]', family_section.group(1))
|
||
|
|
if father_match:
|
||
|
|
father_id = self.generate_id(father_match.group(1))
|
||
|
|
self.relationships.append({
|
||
|
|
'from': char_id,
|
||
|
|
'to': father_id,
|
||
|
|
'type': 'father_of'
|
||
|
|
})
|
||
|
|
|
||
|
|
# Extract son
|
||
|
|
son_match = re.search(r'- \*\*儿子\*\*: \[\[([^\]]+)\]\]', family_section.group(1))
|
||
|
|
if son_match:
|
||
|
|
son_id = self.generate_id(son_match.group(1))
|
||
|
|
self.relationships.append({
|
||
|
|
'from': char_id,
|
||
|
|
'to': son_id,
|
||
|
|
'type': 'son_of'
|
||
|
|
})
|
||
|
|
|
||
|
|
# Extract wife
|
||
|
|
wife_match = re.search(r'- \*\*妻子\*\*: \[\[([^\]]+)\]\]', family_section.group(1))
|
||
|
|
if wife_match:
|
||
|
|
wife_id = self.generate_id(wife_match.group(1))
|
||
|
|
self.relationships.append({
|
||
|
|
'from': char_id,
|
||
|
|
'to': wife_id,
|
||
|
|
'type': 'wife_of'
|
||
|
|
})
|
||
|
|
|
||
|
|
# Extract husband
|
||
|
|
husband_match = re.search(r'- \*\*丈夫\*\*: \[\[([^\]]+)\]\]', family_section.group(1))
|
||
|
|
if husband_match:
|
||
|
|
husband_id = self.generate_id(husband_match.group(1))
|
||
|
|
self.relationships.append({
|
||
|
|
'from': char_id,
|
||
|
|
'to': husband_id,
|
||
|
|
'type': 'husband_of'
|
||
|
|
})
|
||
|
|
|
||
|
|
def generate_id(self, name: str) -> str:
|
||
|
|
"""Generate consistent ID from name"""
|
||
|
|
normalized = re.sub(r'[^a-z0-9\u4e00-\u9fff]+', '_', name.lower()).strip('_')
|
||
|
|
return f"char_{normalized}"
|
||
|
|
|
||
|
|
def visualize_text(self):
|
||
|
|
"""Generate ASCII text visualization"""
|
||
|
|
print("=" * 60)
|
||
|
|
print("《杀了婆婆的我却无人追责?》人物关系图谱")
|
||
|
|
print("=" * 60)
|
||
|
|
|
||
|
|
# Print characters
|
||
|
|
print("\n【人物列表】")
|
||
|
|
for char_id, char in self.characters.items():
|
||
|
|
props = char['properties']
|
||
|
|
print(f"\n {props.get('name', 'Unknown')}")
|
||
|
|
if 'age' in props:
|
||
|
|
print(f" 年龄: {props['age']}")
|
||
|
|
if 'gender' in props:
|
||
|
|
print(f" 性别: {props['gender']}")
|
||
|
|
if 'occupation' in props:
|
||
|
|
print(f" 职业: {props['occupation']}")
|
||
|
|
if 'status' in props:
|
||
|
|
print(f" 状态: {props['status']}")
|
||
|
|
|
||
|
|
# Print relationships
|
||
|
|
print("\n【人物关系】")
|
||
|
|
for rel in self.relationships:
|
||
|
|
from_char = self.characters.get(rel['from'])
|
||
|
|
to_char = self.characters.get(rel['to'])
|
||
|
|
if from_char and to_char:
|
||
|
|
from_name = from_char['properties']['name']
|
||
|
|
to_name = to_char['properties']['name']
|
||
|
|
rel_type = rel['type']
|
||
|
|
print(f"\n {from_name} --[{rel_type}]--> {to_name}")
|
||
|
|
|
||
|
|
# Print ASCII graph
|
||
|
|
print("\n【ASCII 关系图】")
|
||
|
|
self.print_ascii_graph()
|
||
|
|
|
||
|
|
def print_ascii_graph(self):
|
||
|
|
"""Print simple ASCII relationship graph"""
|
||
|
|
print("""
|
||
|
|
┌─────────────┐
|
||
|
|
│ 顾国强 │
|
||
|
|
│ (已故) │
|
||
|
|
└──────┬──────┘
|
||
|
|
│
|
||
|
|
│ 夫妻
|
||
|
|
│
|
||
|
|
┌──────▼──────┐
|
||
|
|
│ 刘婉清 │
|
||
|
|
│ (68岁) │
|
||
|
|
│ 婆婆 │
|
||
|
|
└──────┬──────┘
|
||
|
|
│
|
||
|
|
┌───────────┼───────────┐
|
||
|
|
│ │ │
|
||
|
|
母子 控制压迫 婆媳
|
||
|
|
│ │ │
|
||
|
|
┌────▼────┐ │ ┌────▼────┐
|
||
|
|
│ 顾长风 │ │ │ 叶知秋 │
|
||
|
|
│ (35岁) │ │ │ (28岁) │
|
||
|
|
│ 丈夫 │ │ │ 女主 │
|
||
|
|
└────┬────┘ │ └────┬────┘
|
||
|
|
│ │ │
|
||
|
|
夫妻 想要 记忆
|
||
|
|
│ 遗产 混乱
|
||
|
|
│ │ │
|
||
|
|
└───────────┼───────────┘
|
||
|
|
│
|
||
|
|
调查案件
|
||
|
|
│
|
||
|
|
┌──────▼──────┐
|
||
|
|
│ 张明远 │
|
||
|
|
│ (所长) │
|
||
|
|
└─────────────┘
|
||
|
|
""")
|
||
|
|
|
||
|
|
def export_json(self, output_path: str):
|
||
|
|
"""Export to JSON for further processing"""
|
||
|
|
output = {
|
||
|
|
'characters': list(self.characters.values()),
|
||
|
|
'relationships': self.relationships
|
||
|
|
}
|
||
|
|
|
||
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
||
|
|
json.dump(output, f, ensure_ascii=False, indent=2)
|
||
|
|
|
||
|
|
print(f"\n✅ 导出到: {output_path}")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == '__main__':
|
||
|
|
import argparse
|
||
|
|
|
||
|
|
parser = argparse.ArgumentParser(description='Novel Character Relationship Graph (Corrected Names)')
|
||
|
|
parser.add_argument('--path', type=str,
|
||
|
|
default='/root/.openclaw/workspace/novel/characters',
|
||
|
|
help='Path to character notes')
|
||
|
|
parser.add_argument('--export', type=str,
|
||
|
|
help='Export to JSON file')
|
||
|
|
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
# Create graph
|
||
|
|
graph = NovelCharacterGraph(args.path)
|
||
|
|
|
||
|
|
# Load characters
|
||
|
|
graph.load_characters()
|
||
|
|
|
||
|
|
# Visualize
|
||
|
|
graph.visualize_text()
|
||
|
|
|
||
|
|
# Export if requested
|
||
|
|
if args.export:
|
||
|
|
graph.export_json(args.export)
|