evaluate_settings
TextEvolve Evaluate MK2 settings module
Classes
EvaluateAgent
Bases: BaseDataModel
Evaluate agent configuration
Attributes:
-
role
(str
) –Agent’s role name / specialization
-
description
(str
) –Agent description
-
personality
(str
) –Agent personality
-
semantic_memory_collections
(List[str]
) –A list of semantic memory collections available for the agent to recall.
EvaluateMemoryRecallSettings
Bases: BaseDataModel
Evaluate memory recall settings
Attributes:
-
enabled
(bool
) –When true, enables memory recall for agents in the profile.
-
top_k_vectors
(int
) –Maximum number of results to return when performing a lookup in the vector database.
-
relevance_alpha
(float
) –Weight multiplied to the calculated relevance score.
-
importance_alpha
(float
) –Weight multiplied to the calculated importance score.
-
min_score
(float
) –Minimum score needed to be considered for recall.
-
max_memories
(int
) –Maximum number of memories to recall during a debate turn.
-
max_memory_strategy
(str
) –Max memory strategy used for recall operation.
-
reduce_batch_size
(int
) –Batch size used for the reduce operation.
Attributes
enabled
class-attribute
instance-attribute
enabled: bool = Field(
default=True,
title="Enabled",
description="When true, enables memory recall for agents in the profile. Agents must be bound to at lease one semantic memory collection for this to take effect.",
)
importance_alpha
class-attribute
instance-attribute
importance_alpha: float = Field(
default=1.0,
ge=0.0,
le=1.0,
title="Importance Alpha",
description="Weight multiplied to the calculated importance score, used to control how important 'importance' is to the overall final score of an individual memory.",
)
max_memories
class-attribute
instance-attribute
max_memories: int = Field(
default=10,
ge=1,
title="Max Memories",
description="Maximum number of memories to recall during a debate turn.",
)
max_memory_strategy
class-attribute
instance-attribute
max_memory_strategy: str = Field(
default="HARD_LIMIT",
title="Max Memory Strategy",
description="Controls how the 'get_max_memories' setting is applied. A value of 'COLLECTION_LIMIT' causes 'get_max_memories' to be applied per collection, for example if the agent has 3 collections bound and 'get_max_memories' is 10, then up to 30 memories will be returned. A value of 'HARD_LIMIT' is the maximum number of memories returned across all collections.",
)
min_score
class-attribute
instance-attribute
min_score: float = Field(
default=0.0,
le=1.0,
ge=0.0,
title="Minimum Score",
description="After memories are scored this value is used to filter out any memories that don't meet a minimum score. Setting this to 0.0 will effectively disable the minimum score filter.",
)
reduce_batch_size
class-attribute
instance-attribute
reduce_batch_size: int = Field(
default=200,
title="Reduce Batch Size",
description="For backend LLM map/reduce operations, this parameter controls the batch size used for the reduce operation. Only applicable to a max_memory_strategy of 'SUMMARY'.",
)
relevance_alpha
class-attribute
instance-attribute
relevance_alpha: float = Field(
default=1.0,
ge=0.0,
le=1.0,
title="Relevance Alpha",
description="Weight multiplied to the calculated relevance score, used to control how important 'relevance' is to the overall final score of an individual memory.",
)
top_k_vectors
class-attribute
instance-attribute
top_k_vectors: int = Field(
default=10,
ge=1,
title="Top K Vectors",
description="When performing a lookup in the vector database, this setting determines the maximum number of results to return. Note that this is applicable at the collection level - meaning if an agent has multiple collections bound, this settings applies to each collection.",
)
EvaluateProfile
Bases: BaseDataModel
Evaluate profile settings
Attributes:
-
description
(str
) –Profile strategy description
-
agents
(List[str]
) –A list of agent names that will participate in the debate.
-
completeness_score_weight
(float
) –Default weight for completeness score
-
relevance_score_weight
(float
) –Default weight for relevance score
-
accuracy_score_weight
(float
) –Default weight for accuracy score
-
timeliness_score_weight
(float
) –Default weight for timeliness score
-
mu
(float
) –Mu is am exponential sensitivity coefficient that controls how sensitive the score is to changes in the confidence weight.
-
max_rounds
(int
) –Maximum number of rounds.
-
early_stopping
(bool
) –When set to true, early stopping is possible when the CV threshold is reached.
-
cv_threshold
(float
) –Controls the early stopping mechanism.
-
debate_history_length
(int | None
) –Maximum number of debate history rounds to include in a debate turn.
-
memory_recall
(EvaluateMemoryRecallSettings
) –Profile memory recall settings
-
auto_ranging
(bool
) –When auto ranging is enabled, scores < 0 returned by the LLM are set to 1 and scores > 10 are set to 10.
-
memory_retrieval_context_strategy
(MemoryRetrievalContextStrategy
) –Strategy used to build debater agent memory retrieval context
-
turn_retry_strategy
(IoCFactoryModel | None
) –When a debate turn fails for any reason, this setting controls how Evaluate responds to the failure.
Attributes
accuracy_score_weight
class-attribute
instance-attribute
accuracy_score_weight: float = Field(
default=1.0,
ge=0.0,
le=1.0,
title="Default Accuracy Score Weight",
description="Default weight for accuracy score",
)
agents
class-attribute
instance-attribute
agents: List[str] = Field(
default_factory=list,
title="Agents",
description="A list of agent names that will participate in the debate. Agents must be present in the same namespace as the agent_namespace setting. Note that static agent names will be set to their role from the Evaluate configuration.",
)
auto_ranging
class-attribute
instance-attribute
auto_ranging: bool = Field(
default=True,
title="Auto Ranging",
description="When auto ranging is enabled, scores < 0 returned by the LLM are set to 1 and scores > 10 are set to 10.",
)
completeness_score_weight
class-attribute
instance-attribute
completeness_score_weight: float = Field(
default=1.0,
ge=0.0,
le=1.0,
title="Default Completeness Score Weight",
description="Default weight for completeness score",
)
cv_threshold
class-attribute
instance-attribute
cv_threshold: float = Field(
default=0.0,
ge=0.0,
title="CV Threshold",
description="Controls the early stopping mechanism. After a debate round is finished, the coefficient of variation (CV) of scores is calculated. If the value of c is >= CV for the round then early stopping is triggered. For example, if in debate round 2, CV=0.174 and cv_threshold=0.2 then early stopping is triggered since the scores are within 20% of each other. Similarly, setting cv_threshold=0.0 would mean that all scores would need to be identical to trigger early stopping.",
)
debate_history_length
class-attribute
instance-attribute
debate_history_length: int | None = Field(
default=None,
title="Debate History Length",
description="Maximum number of debate history rounds to include in a debate turn. Setting this too high will significantly increase the number of request tokens used when deriving a debate turn score. A value of None indicates that the entire history is used.",
)
description
class-attribute
instance-attribute
description: str = Field(
default="",
title="Description",
description="Profile strategy description",
)
early_stopping
class-attribute
instance-attribute
early_stopping: bool = Field(
default=True,
title="Early Stopping",
description="When set to true, early stopping is possible when the CV threshold is reached.",
)
max_rounds
class-attribute
instance-attribute
max_rounds: int = Field(
default=2,
description="Maximum number of rounds. For best results keep this at a minimum of 2 so each debater has a chance to respond and adjust to the first round of conversation.",
)
memory_recall
class-attribute
instance-attribute
memory_recall: EvaluateMemoryRecallSettings = Field(
default_factory=EvaluateMemoryRecallSettings,
title="Memory",
description="Profile memory recall settings",
)
memory_retrieval_context_strategy
class-attribute
instance-attribute
memory_retrieval_context_strategy: (
MemoryRetrievalContextStrategy
) = Field(
default=CAT,
title="Memory Retrieval Context Strategy",
description="Strategy used to build debater agent memory retrieval context",
)
mu
class-attribute
instance-attribute
mu: float = Field(
default=1.0,
title="Mu",
description="Mu is am exponential sensitivity coefficient that controls how sensitive the score is to changes in the confidence weight. For example when mu=0.0, the confidence weights derived during evaluation have no impact to the score. When 0.0 < mu <1.0, is used when confidence should have a moderate impact on score. This should be adjusted based on how well the LLM is at assigning the accurate confidence.",
)
relevance_score_weight
class-attribute
instance-attribute
relevance_score_weight: float = Field(
default=1.0,
ge=0.0,
le=1.0,
title="Default Relevance Score Weight",
description="Default weight for relevance score",
)
timeliness_score_weight
class-attribute
instance-attribute
timeliness_score_weight: float = Field(
default=1.0,
ge=0.0,
le=1.0,
title="Default Timeliness Score Weight",
description="Default weight for timeliness score",
)
turn_retry_strategy
class-attribute
instance-attribute
turn_retry_strategy: IoCFactoryModel | None = Field(
None,
title="Turn Retry Strategy",
description="When a debate turn fails for any reason, this setting controls how Evaluate responds to the failure. Too many failures may indicate a problem with prompt or inferencing parameters in the debate_turn chain, or with the LLM itself.",
)
Functions
EvaluateSettings
Bases: BaseDataModel
Root TextEvolve/Evaluate settings for the EleanorAI Framework.
Attributes:
-
service_pool_name
(str
) –Name of the service-level thread pool used for evaluation.
-
service_pool_size
(int
) –Size of the service-level thread pool used for evaluation.
-
chain_initializer
(InitializerModel | None
) –Chain initializer to run on BACKEND_INIT_READY
-
backend_api_initializer
(InitializerModel | None
) –RDBMS initializer to run on BACKEND_INIT_READY
-
chains
(Dict[str, IoCFactoryModel]
) –Chain definitions used by Evaluate
-
agent_namespace
(str
) –EleanorAI Framework managed namespace which debater agents belong to.
-
manage_resources
(bool
) –When true, the Evaluate logic engine will automatically create namespace and default agent resources if they do not exist.
-
agent_namespace_settings
(NamespaceResourceSettings | None
) –Default agent namespace settings to use when auto-creating the namespace where debater agents will reside in.
-
static_agents
(List[EvaluateAgent]
) –When create_resources is enabled, the agents in this configuration will by synchronized to the RDBMS on startup.
-
default_profile
(str
) –Name of the default profile when one is not specified by the client
-
profiles
(Dict[str, EvaluateProfile]
) –Evaluation profile settings
-
log_summary
(bool
) –After each evaluation write a detailed summary to the log
Attributes
agent_namespace
class-attribute
instance-attribute
agent_namespace: str = Field(
...,
title="Agent Namespace",
description="EleanorAI Framework managed namespace which debater agents belong to. This needs to be considered a system/internal namespace as the agents here will be managed by the Evaluate engine internally.",
)
agent_namespace_settings
class-attribute
instance-attribute
agent_namespace_settings: (
NamespaceResourceSettings | None
) = Field(
default=None,
title="Agent Namespace Settings",
description="Default agent namespace settings to use when auto-creating the namespace where debater agents will reside in.",
)
backend_api_initializer
class-attribute
instance-attribute
backend_api_initializer: InitializerModel | None = Field(
default=None,
title="Backend Initializer",
description="RDBMS initializer to run on BACKEND_INIT_READY",
)
chain_initializer
class-attribute
instance-attribute
chain_initializer: InitializerModel | None = Field(
default=None,
title="Chain Initializer",
description="Chain initializer to run on BACKEND_INIT_READY",
)
chains
class-attribute
instance-attribute
chains: Dict[str, IoCFactoryModel] = Field(
default_factory=dict,
title="Chains",
description="Chain definitions used by Evaluate",
)
default_profile
class-attribute
instance-attribute
default_profile: str = Field(
...,
title="Default Profile",
description="Name of the default profile when one is not specified by the client",
)
log_summary
class-attribute
instance-attribute
log_summary: bool = Field(
default=True,
title="Log Summary",
description="After each evaluation write a detailed summary to the log",
)
manage_resources
class-attribute
instance-attribute
manage_resources: bool = Field(
default=False,
title="Create Resources",
description="When true, the Evaluate logic engine will automatically create namespace and default agent resources if they do not exist.",
)
profiles
class-attribute
instance-attribute
profiles: Dict[str, EvaluateProfile] = Field(
default_factory=dict,
title="Profiles",
description="Evaluation profile settings",
)
service_pool_name
class-attribute
instance-attribute
service_pool_name: str = Field(
default="evaluate_pool",
title="Service Pool Name",
description="Name of the service-level thread pool used for evaluation. This thread pool governs the maximum possible parallel evaluation tasks that can be active at any given time.",
)
service_pool_size
class-attribute
instance-attribute
service_pool_size: int = Field(
default=10,
title="Service Pool Size",
description="Size of the service-level thread pool used for evaluation.",
)
static_agents
class-attribute
instance-attribute
static_agents: List[EvaluateAgent] = Field(
default_factory=list,
title="Static Agents",
description="When create_resources is enabled, the agents in this configuration will by synchronized to the RDBMS on startup.",
)
Functions
validate_backend_api_initializer
classmethod
validate_backend_api_initializer(
value,
) -> InitializerModel | None
validate_chain_initializer
classmethod
validate_chain_initializer(
value,
) -> InitializerModel | None