novel-doomsday-resurgence/skills/fanfic-writer/scripts/tomato_fetch.py
唐天洛 cb9b16e5a8 初始提交:番茄小说创作工作区
包含:
- 核心配置文件(AGENTS.md, SOUL.md, USER.md等)
- 记忆系统(memory/文件夹)
- 技能库(skills/文件夹)
- 小说内容(novel/文件夹)
- .gitignore配置
2026-03-30 15:46:26 +08:00

131 lines
4.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Tomato Novel (番茄小说) Genre Fetcher
Fetches trending genres/data from 番茄小说
【说明】
- 此脚本为可选功能,用于主动爬取番茄热门题材做推荐
- 主要流程:用户直接指定题材,无需爬取
- 如需使用python tomato_fetch.py <output_dir>
- 如果网站屏蔽爬取,使用示例数据或用户手动提供题材
【推荐用法】
用户直接说:"我想写一本都市异能的小说"
→ 跳过此脚本,直接进入大纲生成
"""
import urllib.request
import json
import re
from pathlib import Path
def fetch_tomato_rankings():
"""
Fetch popular genres/themes from 番茄小说
Returns:
dict: genre data for analysis
"""
# 番茄小说热门分类页面
# Note: 实际爬取可能需要处理反爬机制
urls = [
"https://fanqienovel.com/library/",
"https://fanqienovel.com/rank/",
]
genres_data = {
"source": "番茄小说",
"fetch_time": None,
"genres": []
}
# Placeholder implementation
# 实际使用时需要根据番茄网站的HTML结构调整解析逻辑
# 热门题材示例数据(实际应从网站抓取)
popular_genres = [
{
"name": "都市异能",
"trend_score": 95,
"description": "现代都市背景下的超能力/系统故事",
"avg_words": 800000,
"hot_books": ["书名A", "书名B"]
},
{
"name": "玄幻修仙",
"trend_score": 90,
"description": "传统修仙、升级打怪",
"avg_words": 1200000,
"hot_books": ["书名C", "书名D"]
},
{
"name": "历史架空",
"trend_score": 85,
"description": "穿越历史、权谋争霸",
"avg_words": 1000000,
"hot_books": ["书名E"]
},
{
"name": "游戏竞技",
"trend_score": 80,
"description": "电竞、游戏世界",
"avg_words": 600000,
"hot_books": ["书名F"]
},
{
"name": "科幻末世",
"trend_score": 75,
"description": "末日生存、星际文明",
"avg_words": 900000,
"hot_books": ["书名G"]
}
]
genres_data["genres"] = popular_genres
genres_data["fetch_time"] = "manual_sample" # 实际应为时间戳
return genres_data
def save_genre_data(data, book_dir):
"""Save fetched genre data"""
output_path = Path(book_dir) / "genre_data.json"
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
print(f"Genre data saved to: {output_path}")
def format_genre_for_prompt(genres_data):
"""Format genre data for the analysis prompt"""
lines = ["=== 番茄小说热门题材数据 ===", ""]
for g in genres_data.get("genres", []):
lines.append(f"题材: {g['name']}")
lines.append(f"热度: {g['trend_score']}/100")
lines.append(f"描述: {g['description']}")
lines.append(f"常见字数: {g['avg_words']:,}")
lines.append(f"代表作: {', '.join(g['hot_books'])}")
lines.append("")
return "\n".join(lines)
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("Usage: tomato_fetch.py <output_dir>")
print("Fetches genre data from 番茄小说 for novel planning")
sys.exit(1)
output_dir = sys.argv[1]
Path(output_dir).mkdir(parents=True, exist_ok=True)
print("Fetching tomato novel genre data...")
# In full implementation, this would actually scrape the website
data = fetch_tomato_rankings()
save_genre_data(data, output_dir)
# Also save formatted version for prompts
formatted = format_genre_for_prompt(data)
formatted_path = Path(output_dir) / "genre_data_formatted.txt"
with open(formatted_path, 'w', encoding='utf-8') as f:
f.write(formatted)
print(f"Formatted data saved to: {formatted_path}")