Skip to content

cr-default

CommandR family to canonical string parser

Attributes

STATE_INITIAL module-attribute

STATE_INITIAL = 'INITIAL'

STATE_MESSAGE module-attribute

STATE_MESSAGE = 'message'

STATE_START module-attribute

STATE_START = 'start'

states module-attribute

states = (
    (STATE_START, "exclusive"),
    (STATE_MESSAGE, "exclusive"),
)

t_WHITESPACE module-attribute

t_WHITESPACE = '[ \\t]+'

t_message_error module-attribute

t_message_error = t_error

t_start_WHITESPACE module-attribute

t_start_WHITESPACE = '[ \\t]+'

t_start_error module-attribute

t_start_error = t_error

tokens module-attribute

tokens = (
    "BOS",
    "START_OF_TURN",
    "END_OF_TURN",
    "SYSTEM_TOKEN",
    "USER_TOKEN",
    "CHATBOT_TOKEN",
    "MESSAGE",
    "NEWLINE",
    "WHITESPACE",
)

Classes

CRDefault

CRDefault(env: Environment)

Bases: BaseCanonical

Command-R (default) lexer implementation

Attributes

name property
name: str

Functions

to_canonical
to_canonical(data: str, **kwargs) -> CanonicalChat

Functions

t_BOS

t_BOS(t: LexToken)

t_END_OF_TURN

t_END_OF_TURN(t: LexToken)

<|END_OF_TURN_TOKEN|>

t_NEWLINE

t_NEWLINE(t: LexToken)

\n

t_START_OF_TURN

t_START_OF_TURN(t: LexToken)

<|START_OF_TURN_TOKEN|>

t_error

t_error(t: LexToken)

t_message_END_OF_TURN

t_message_END_OF_TURN(t: LexToken)

<|END_OF_TURN_TOKEN|>

t_message_MESSAGE

t_message_MESSAGE(t: LexToken)

[\s\S]+?(?=<|END_OF_TURN_TOKEN|>)

t_start_CHATBOT_TOKEN

t_start_CHATBOT_TOKEN(t: LexToken)

<|CHATBOT_TOKEN|>

t_start_END_OF_TURN

t_start_END_OF_TURN(t: LexToken)

<|END_OF_TURN_TOKEN|>

t_start_SYSTEM_TOKEN

t_start_SYSTEM_TOKEN(t: LexToken)

<|SYSTEM_TOKEN|>

t_start_USER_TOKEN

t_start_USER_TOKEN(t: LexToken)

<|USER_TOKEN|>