Files
drama-gen/QualityAssessment.py
2026-02-25 03:02:52 +00:00

386 lines
15 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
小说质量评估模块
提供多维度的质量评估和改进建议
"""
import re
import json
import logging
from typing import Dict, List, Tuple, Any
from dataclasses import dataclass
from collections import Counter
logger = logging.getLogger(__name__)
@dataclass
class QualityMetrics:
"""质量指标"""
# 基础指标
total_chars: int
effective_chars: int
paragraph_count: int
sentence_count: int
dialogue_count: int
# 比例指标
dialogue_ratio: float
description_ratio: float
action_ratio: float
# 复杂度指标
avg_sentence_length: float
vocabulary_richness: float
repetition_rate: float
# 结构指标
scene_transitions: int
character_mentions: Dict[str, int]
emotion_words: int
# 质量评分
overall_score: float
dimension_scores: Dict[str, float]
class NovelQualityAssessor:
"""小说质量评估器"""
def __init__(self):
# 情感词汇库
self.emotion_words = {
"positive": ["开心", "快乐", "兴奋", "满足", "温暖", "感动", "幸福", "甜蜜", "欣慰", "骄傲"],
"negative": ["难过", "痛苦", "愤怒", "失望", "恐惧", "焦虑", "绝望", "孤独", "委屈", "后悔"],
"complex": ["复杂", "矛盾", "纠结", "挣扎", "无奈", "感慨", "思考", "反思", "领悟", "成长"]
}
# 动作词汇
self.action_words = ["", "", "", "", "", "", "", "", "", "", "", "", "", "", ""]
# 描写词汇
self.description_words = ["美丽", "漂亮", "高大", "宽敞", "明亮", "温暖", "寒冷", "炎热", "安静", "嘈杂"]
# 对话标记
self.dialogue_patterns = [
r'[【「"]([^【「"]+)[】」"]', # 【】「」""标记的对话
r'"([^"]+)"', # 英文引号
r"'([^']+)'", # 中文引号
]
def assess_novel_quality(self, text: str, title: str = "") -> QualityMetrics:
"""评估小说质量"""
logger.info("开始质量评估...")
# 基础统计
basic_stats = self._calculate_basic_stats(text)
# 内容分析
content_analysis = self._analyze_content(text)
# 结构分析
structure_analysis = self._analyze_structure(text)
# 语言质量分析
language_analysis = self._analyze_language_quality(text)
# 计算综合评分
dimension_scores = {
"内容丰富度": content_analysis["richness_score"],
"结构合理性": structure_analysis["structure_score"],
"语言质量": language_analysis["language_score"],
"情感表达": content_analysis["emotion_score"],
"对话质量": content_analysis["dialogue_score"]
}
overall_score = sum(dimension_scores.values()) / len(dimension_scores)
# 构建质量指标对象
metrics = QualityMetrics(
total_chars=basic_stats["total_chars"],
effective_chars=basic_stats["effective_chars"],
paragraph_count=basic_stats["paragraph_count"],
sentence_count=basic_stats["sentence_count"],
dialogue_count=basic_stats["dialogue_count"],
dialogue_ratio=content_analysis["dialogue_ratio"],
description_ratio=content_analysis["description_ratio"],
action_ratio=content_analysis["action_ratio"],
avg_sentence_length=language_analysis["avg_sentence_length"],
vocabulary_richness=language_analysis["vocabulary_richness"],
repetition_rate=language_analysis["repetition_rate"],
scene_transitions=structure_analysis["scene_transitions"],
character_mentions=structure_analysis["character_mentions"],
emotion_words=content_analysis["emotion_words"],
overall_score=overall_score,
dimension_scores=dimension_scores
)
logger.info(f"质量评估完成,综合评分: {overall_score:.1f}")
return metrics
def _calculate_basic_stats(self, text: str) -> Dict[str, Any]:
"""计算基础统计信息"""
# 总字符数
total_chars = len(text)
# 有效字符数(中文字符+标点+数字)
lines = [line.strip() for line in text.split('\n') if line.strip()]
combined = ''.join(lines)
chinese_chars = re.findall(r'[\u4e00-\u9fa5]', combined)
punctuation = re.findall(r'[,。!?;:、""''()【】《》]', combined)
numbers = re.findall(r'\d', combined)
effective_chars = len(chinese_chars) + len(punctuation) + len(numbers)
# 段落数
paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
paragraph_count = len(paragraphs)
# 句子数
sentences = re.split(r'[。!?]', text)
sentence_count = len([s for s in sentences if s.strip()])
# 对话数
dialogue_count = 0
for pattern in self.dialogue_patterns:
dialogue_count += len(re.findall(pattern, text))
return {
"total_chars": total_chars,
"effective_chars": effective_chars,
"paragraph_count": paragraph_count,
"sentence_count": sentence_count,
"dialogue_count": dialogue_count
}
def _analyze_content(self, text: str) -> Dict[str, Any]:
"""分析内容质量"""
# 对话比例
dialogue_chars = 0
for pattern in self.dialogue_patterns:
matches = re.findall(pattern, text)
dialogue_chars += sum(len(match) for match in matches)
dialogue_ratio = dialogue_chars / len(text) if text else 0
# 描写比例(通过描写词汇估算)
description_count = 0
for word in self.description_words:
description_count += text.count(word)
description_ratio = min(description_count / 100, 0.5) # 归一化
# 动作比例
action_count = 0
for word in self.action_words:
action_count += text.count(word)
action_ratio = min(action_count / 100, 0.5) # 归一化
# 情感词汇统计
emotion_words = 0
for category, words in self.emotion_words.items():
for word in words:
emotion_words += text.count(word)
# 评分计算
richness_score = min(100, (dialogue_ratio * 30 + description_ratio * 40 + action_ratio * 30) * 100)
emotion_score = min(100, emotion_words * 2) # 每个情感词2分
dialogue_score = min(100, dialogue_ratio * 200) # 对话比例评分
return {
"dialogue_ratio": dialogue_ratio,
"description_ratio": description_ratio,
"action_ratio": action_ratio,
"emotion_words": emotion_words,
"richness_score": richness_score,
"emotion_score": emotion_score,
"dialogue_score": dialogue_score
}
def _analyze_structure(self, text: str) -> Dict[str, Any]:
"""分析结构质量"""
# 场景转换(通过时间、地点词汇估算)
scene_markers = ["突然", "接着", "然后", "后来", "此时", "这时", "同时", "与此同时", "第二天", "几天后"]
scene_transitions = 0
for marker in scene_markers:
scene_transitions += text.count(marker)
# 角色提及统计(简单的人名识别)
# 这里使用简单的启发式方法实际应用中可以使用NER
potential_names = re.findall(r'[\u4e00-\u9fa5]{2,3}(?=[,。!?:;]|说|想|看|听)', text)
character_mentions = Counter(potential_names)
# 只保留出现频率较高的(可能是角色名)
character_mentions = {name: count for name, count in character_mentions.items() if count >= 3}
# 结构评分
structure_score = min(100, scene_transitions * 5 + len(character_mentions) * 10)
return {
"scene_transitions": scene_transitions,
"character_mentions": character_mentions,
"structure_score": structure_score
}
def _analyze_language_quality(self, text: str) -> Dict[str, Any]:
"""分析语言质量"""
# 句子平均长度
sentences = re.split(r'[。!?]', text)
valid_sentences = [s.strip() for s in sentences if s.strip()]
avg_sentence_length = sum(len(s) for s in valid_sentences) / len(valid_sentences) if valid_sentences else 0
# 词汇丰富度(简单估算)
words = re.findall(r'[\u4e00-\u9fa5]+', text)
unique_words = set(words)
vocabulary_richness = len(unique_words) / len(words) if words else 0
# 重复率(检查重复的短语)
phrases = []
for i in range(len(text) - 10):
phrase = text[i:i+10]
if re.match(r'^[\u4e00-\u9fa5""''()【】《》]+$', phrase):
phrases.append(phrase)
phrase_counts = Counter(phrases)
repeated_phrases = sum(1 for count in phrase_counts.values() if count > 1)
repetition_rate = repeated_phrases / len(phrases) if phrases else 0
# 语言质量评分
length_score = min(100, max(0, 100 - abs(avg_sentence_length - 20) * 2)) # 理想句长20字
richness_score = vocabulary_richness * 100
repetition_score = max(0, 100 - repetition_rate * 200)
language_score = (length_score + richness_score + repetition_score) / 3
return {
"avg_sentence_length": avg_sentence_length,
"vocabulary_richness": vocabulary_richness,
"repetition_rate": repetition_rate,
"language_score": language_score
}
def generate_improvement_suggestions(self, metrics: QualityMetrics) -> List[str]:
"""生成改进建议"""
suggestions = []
# 基于各维度评分给出建议
if metrics.dimension_scores["内容丰富度"] < 70:
suggestions.append("建议增加更多的场景描写和人物内心活动,提升内容丰富度")
if metrics.dimension_scores["结构合理性"] < 70:
suggestions.append("建议优化故事结构,增加场景转换和情节推进")
if metrics.dimension_scores["语言质量"] < 70:
suggestions.append("建议提升语言表达质量,避免重复用词,丰富词汇")
if metrics.dimension_scores["情感表达"] < 70:
suggestions.append("建议增强情感表达,多使用情感词汇和内心独白")
if metrics.dimension_scores["对话质量"] < 70:
suggestions.append("建议增加对话内容,让角色通过对话展现性格")
# 基于具体指标给出建议
if metrics.dialogue_ratio < 0.2:
suggestions.append("对话比例偏低,建议增加角色对话来推进情节")
if metrics.avg_sentence_length > 30:
suggestions.append("句子平均长度偏长,建议适当使用短句增加节奏感")
if metrics.repetition_rate > 0.1:
suggestions.append("存在较多重复表达,建议使用同义词替换增加表达多样性")
if len(metrics.character_mentions) < 3:
suggestions.append("角色数量偏少,建议增加配角丰富故事内容")
return suggestions
def export_quality_report(self, metrics: QualityMetrics, title: str = "", output_file: str = None) -> str:
"""导出质量报告"""
report = f"""
# 小说质量评估报告
## 基本信息
- 标题: {title or "未命名"}
- 总字符数: {metrics.total_chars:,}
- 有效字符数: {metrics.effective_chars:,}
- 段落数: {metrics.paragraph_count}
- 句子数: {metrics.sentence_count}
- 对话数: {metrics.dialogue_count}
## 综合评分: {metrics.overall_score:.1f}/100
## 各维度评分
- 内容丰富度: {metrics.dimension_scores['内容丰富度']:.1f}/100
- 结构合理性: {metrics.dimension_scores['结构合理性']:.1f}/100
- 语言质量: {metrics.dimension_scores['语言质量']:.1f}/100
- 情感表达: {metrics.dimension_scores['情感表达']:.1f}/100
- 对话质量: {metrics.dimension_scores['对话质量']:.1f}/100
## 详细指标
- 对话比例: {metrics.dialogue_ratio:.1%}
- 描写比例: {metrics.description_ratio:.1%}
- 动作比例: {metrics.action_ratio:.1%}
- 平均句长: {metrics.avg_sentence_length:.1f}
- 词汇丰富度: {metrics.vocabulary_richness:.1%}
- 重复率: {metrics.repetition_rate:.1%}
- 场景转换: {metrics.scene_transitions}
- 情感词汇: {metrics.emotion_words}
## 角色提及统计
"""
for character, count in metrics.character_mentions.items():
report += f"- {character}: {count}\n"
# 添加改进建议
suggestions = self.generate_improvement_suggestions(metrics)
if suggestions:
report += "\n## 改进建议\n"
for i, suggestion in enumerate(suggestions, 1):
report += f"{i}. {suggestion}\n"
# 保存到文件
if output_file:
try:
with open(output_file, 'w', encoding='utf-8') as f:
f.write(report)
logger.info(f"质量报告已保存到: {output_file}")
except Exception as e:
logger.error(f"保存质量报告失败: {str(e)}")
return report
def assess_novel_quality(text: str, title: str = "") -> QualityMetrics:
"""便捷函数:评估小说质量"""
assessor = NovelQualityAssessor()
return assessor.assess_novel_quality(text, title)
def generate_quality_report(text: str, title: str = "", output_file: str = None) -> str:
"""便捷函数:生成质量报告"""
assessor = NovelQualityAssessor()
metrics = assessor.assess_novel_quality(text, title)
return assessor.export_quality_report(metrics, title, output_file)
if __name__ == "__main__":
# 测试代码
sample_text = """
《测试小说》
李明走进办公室,心情复杂。今天是他入职的第一天,既兴奋又紧张。
【你好,我是新来的李明。】他对前台小姐说道。
【欢迎!我是小王,有什么需要帮助的尽管说。】小王热情地回答。
李明感到一阵温暖,看来这里的同事都很友善。他想起了母亲昨天的话:【要好好工作,不要让我们失望。】
突然,一个严肃的声音响起:【你就是新来的?跟我来。】
"""
metrics = assess_novel_quality(sample_text, "测试小说")
print(f"综合评分: {metrics.overall_score:.1f}")
print(f"对话比例: {metrics.dialogue_ratio:.1%}")
print(f"平均句长: {metrics.avg_sentence_length:.1f}")