Skip to content

Classification

summ.classify.Classes

Bases: StrEnum

The parent class for all custom sets of tags.

To define a set of custom tags, inherit from this class and use Pyhon Enum syntax to define them, using the category name as the prefix for each tag.

Example
class MyClasses(Classes):
    ROLE_IC = auto()
    ROLE_MANAGER = auto()

    SECTOR_TECH = auto()
    SECTOR_FINANCE = auto()
Source code in summ/classify/classes.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
@unique
class Classes(StrEnum):
    """The parent class for all custom sets of tags.

    To define a set of custom tags, inherit from this class
    and use Pyhon Enum syntax to define them, using the category
    name as the prefix for each tag.

    Example:
        ```python
        class MyClasses(Classes):
            ROLE_IC = auto()
            ROLE_MANAGER = auto()

            SECTOR_TECH = auto()
            SECTOR_FINANCE = auto()
        ```
    """

    @classmethod
    def get(cls, val: str) -> Optional[Self]:
        """:meta private:"""
        val = val.strip()
        try:
            return cls(val)
        except ValueError:
            try:
                return cls[val]
            except KeyError:
                pass

get(val: str) -> Optional[Self] classmethod

:meta private:

Source code in summ/classify/classes.py
24
25
26
27
28
29
30
31
32
33
34
@classmethod
def get(cls, val: str) -> Optional[Self]:
    """:meta private:"""
    val = val.strip()
    try:
        return cls(val)
    except ValueError:
        try:
            return cls[val]
        except KeyError:
            pass

summ.classify.Classifier

Bases: ABC, Generic[C], Chain

The base class for specifying custom classifiers to apply tags to an interview.

Source code in summ/classify/classifier.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
class Classifier(ABC, Generic[C], Chain):
    """The base class for specifying custom classifiers to apply tags to an interview."""

    CATEGORY: str
    """The name of the category to tag. Must be the prefix of a set of tags in your [`Classes`][summ.classify.Classes] subclass."""

    VARS: dict[str, str]
    """A dictionary mapping variable names to descriptions. These will be used to generate the prompt."""

    EXAMPLES: list[dict[str, str]]
    """A list of few-shot examples. Each example should be a dictionary with keys corresponding to the keys in `VARS`."""

    PREFIX: str = "Your job is to classify the following interview into one of the following categories."
    """The prompt prefix."""

    SUFFIX: str = ""
    """The prompt suffix."""

    classes: Type[C]
    """A subclass of [`Classes`][summ.classify.Classes] that defines the set of tags to use."""

    classifiers: dict[str, Type[Self]] = {}
    """A registry of subclasses implementing custom classifiers."""

    def __init_subclass__(cls: Type[Self], classes: Type[C], **kwargs):
        super().__init_subclass__(**kwargs)
        cls.classes = classes
        cls.check()
        cls.classifiers[cls.CATEGORY] = cls

    @classmethod
    def check(cls):
        """Ensures that the supplied constants are sound."""

        if not any(c for c in cls.classes if c.name.startswith(cls.CATEGORY)):
            raise ValueError(
                f"{cls.classes} does not contain any classes with the prefix {cls.CATEGORY}"
            )
        if cls.CATEGORY.lower() not in cls.VARS:
            raise ValueError(f"VARS does not contain the key {cls.CATEGORY.lower()}")

    @classmethod
    def classify_all(cls, docs: list[Document]) -> dict[str, list[C]]:
        """Runs a Document through all registered subclasses."""

        return {c: klass().run(docs) for c, klass in cls.classifiers.items()}

    def example_template(self, dynamic=set()) -> str:
        """The template used to construct one example in the prompt."""

        return "\n".join(
            [
                f"{v}: { '' if k in dynamic else '{' + k + '}' }"
                for k, v in self.VARS.items()
            ]
        )

    def prompt_template(self):
        """The template used to construct the prompt."""

        classes = "\n".join(
            [c.value for c in self.classes if c.startswith(self.CATEGORY.lower())]
        )
        return FewShotPromptTemplate(
            examples=self.EXAMPLES,
            example_prompt=PromptTemplate(
                input_variables=list(self.VARS.keys()),
                template=self.example_template(),
            ),
            prefix=dedent(
                f"""
                {self.PREFIX}
                Return a comma-separated list of classes, with no extra text of explanation.
                For example: "industry_software, role_ic"

                Options:
                {classes}

                {self.SUFFIX}

                """
            ),
            suffix=self.example_template(dynamic={self.CATEGORY.lower()}),
            input_variables=list(self.VARS.keys() - {self.CATEGORY.lower()}),
            example_separator="\n",
        )

    def debug_prompt(self, **kwargs: dict[str, str]) -> str:
        """Returns the prompt with the given variables filled in."""

        return self.prompt_template().format(**kwargs)

    def _parse(self, results: str) -> list[C]:
        return [
            c for result in results.split(",") for c in [self.classes.get(result)] if c
        ]

    def run(self, docs: list[Document]) -> list[C]:
        """Runs a Document through the classifier and returns the tags."""
        chain = LLMChain(llm=self.llm, prompt=self.prompt_template())
        results = self.cached(
            "run",
            chain,
            docs,
            self.classify,
        )
        return self._parse(results)

    @abstractmethod
    def classify(self, docs: list[Document]) -> dict[str, str]:
        """Extracts a set of VARS from a list of Documents.
        This method must be implemented by subclasses.

        Args:
            docs: The Documents resulting from apply the Splitter to an import source.

        Returns:
            A dictionary mapping variable names to values. The keys should be a subset of the keys in `VARS`.
        """
        raise NotImplementedError

CATEGORY: str class-attribute

The name of the category to tag. Must be the prefix of a set of tags in your Classes subclass.

VARS: dict[str, str] class-attribute

A dictionary mapping variable names to descriptions. These will be used to generate the prompt.

EXAMPLES: list[dict[str, str]] class-attribute

A list of few-shot examples. Each example should be a dictionary with keys corresponding to the keys in VARS.

PREFIX: str = 'Your job is to classify the following interview into one of the following categories.' class-attribute

The prompt prefix.

SUFFIX: str = '' class-attribute

The prompt suffix.

classes: Type[C] class-attribute

A subclass of Classes that defines the set of tags to use.

classifiers: dict[str, Type[Self]] = {} class-attribute

A registry of subclasses implementing custom classifiers.

check() classmethod

Ensures that the supplied constants are sound.

Source code in summ/classify/classifier.py
45
46
47
48
49
50
51
52
53
54
@classmethod
def check(cls):
    """Ensures that the supplied constants are sound."""

    if not any(c for c in cls.classes if c.name.startswith(cls.CATEGORY)):
        raise ValueError(
            f"{cls.classes} does not contain any classes with the prefix {cls.CATEGORY}"
        )
    if cls.CATEGORY.lower() not in cls.VARS:
        raise ValueError(f"VARS does not contain the key {cls.CATEGORY.lower()}")

classify_all(docs: list[Document]) -> dict[str, list[C]] classmethod

Runs a Document through all registered subclasses.

Source code in summ/classify/classifier.py
56
57
58
59
60
@classmethod
def classify_all(cls, docs: list[Document]) -> dict[str, list[C]]:
    """Runs a Document through all registered subclasses."""

    return {c: klass().run(docs) for c, klass in cls.classifiers.items()}

example_template(dynamic = set()) -> str

The template used to construct one example in the prompt.

Source code in summ/classify/classifier.py
62
63
64
65
66
67
68
69
70
def example_template(self, dynamic=set()) -> str:
    """The template used to construct one example in the prompt."""

    return "\n".join(
        [
            f"{v}: { '' if k in dynamic else '{' + k + '}' }"
            for k, v in self.VARS.items()
        ]
    )

prompt_template()

The template used to construct the prompt.

Source code in summ/classify/classifier.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def prompt_template(self):
    """The template used to construct the prompt."""

    classes = "\n".join(
        [c.value for c in self.classes if c.startswith(self.CATEGORY.lower())]
    )
    return FewShotPromptTemplate(
        examples=self.EXAMPLES,
        example_prompt=PromptTemplate(
            input_variables=list(self.VARS.keys()),
            template=self.example_template(),
        ),
        prefix=dedent(
            f"""
            {self.PREFIX}
            Return a comma-separated list of classes, with no extra text of explanation.
            For example: "industry_software, role_ic"

            Options:
            {classes}

            {self.SUFFIX}

            """
        ),
        suffix=self.example_template(dynamic={self.CATEGORY.lower()}),
        input_variables=list(self.VARS.keys() - {self.CATEGORY.lower()}),
        example_separator="\n",
    )

debug_prompt(**kwargs: dict[str, str]) -> str

Returns the prompt with the given variables filled in.

Source code in summ/classify/classifier.py
102
103
104
105
def debug_prompt(self, **kwargs: dict[str, str]) -> str:
    """Returns the prompt with the given variables filled in."""

    return self.prompt_template().format(**kwargs)

run(docs: list[Document]) -> list[C]

Runs a Document through the classifier and returns the tags.

Source code in summ/classify/classifier.py
112
113
114
115
116
117
118
119
120
121
def run(self, docs: list[Document]) -> list[C]:
    """Runs a Document through the classifier and returns the tags."""
    chain = LLMChain(llm=self.llm, prompt=self.prompt_template())
    results = self.cached(
        "run",
        chain,
        docs,
        self.classify,
    )
    return self._parse(results)

classify(docs: list[Document]) -> dict[str, str] abstractmethod

Extracts a set of VARS from a list of Documents. This method must be implemented by subclasses.

PARAMETER DESCRIPTION
docs

The Documents resulting from apply the Splitter to an import source.

TYPE: list[Document]

RETURNS DESCRIPTION
dict[str, str]

A dictionary mapping variable names to values. The keys should be a subset of the keys in VARS.

Source code in summ/classify/classifier.py
123
124
125
126
127
128
129
130
131
132
133
134
@abstractmethod
def classify(self, docs: list[Document]) -> dict[str, str]:
    """Extracts a set of VARS from a list of Documents.
    This method must be implemented by subclasses.

    Args:
        docs: The Documents resulting from apply the Splitter to an import source.

    Returns:
        A dictionary mapping variable names to values. The keys should be a subset of the keys in `VARS`.
    """
    raise NotImplementedError