Skip to content

API Reference

summ.Summ

The main entry point for both populating and querying the model.

Source code in summ/summ.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
class Summ:
    """The main entry point for both populating and querying the model."""

    def __init__(self, index: str = "sum-facts", n: int = 3):
        self.index = index
        self.n = n

    def populate(
        self,
        path: Path,
        parallel: bool = True,
        pipe: Optional[Pipeline] = None,
    ):
        """Populate the model with data from a given path.

        Args:
            path (Path): The path to the data (format depends on [Importer][summ.importers.Importer]).
            parallel (bool, optional): Whether to run the pipeline in parallel.
            pipe (Optional[Pipeline], optional): The pipeline to use. If one is not supplied, a default one will be constructed.
        """
        pipe = pipe or Pipeline.default(path, self.index)

        if not pipe.embedder.has_index():
            try:
                print("Creating index, this may take a while...")
                pipe.dprint("Create Index", pipe.embedder.index_name)
                pipe.embedder.create_index()
            except Exception as e:
                if "already exists" in str(e):
                    msg = "Index already exists!"
                elif "quota" in str(e):
                    msg = "You have exceeded the number of indexes for your Pinecone tier!"
                else:
                    raise e
                print(msg)
                pipe.dprint(msg)
        else:
            pipe.dprint("Index already exists!")

        pipe.run(parallel=parallel)

    def query(
        self,
        question: str,
        classes: list[Classes] = [],
        corpus: list[Document] = [],
        debug: bool = True,
    ) -> str:
        """
        Query a pre-populated model with a given question.

        Args:
            question (str): The question to ask.
            n (int, optional): The number of facts to use per sub-query.
            classes (list[Classes], optional): The set of tags to use as filters (AND).
            debug (bool, optional): Whether to print intermediate steps.
        """
        if not Embedder(self.index).has_index():
            raise Exception(
                f"Index {self.index} not found! Please run `summ populate` first."
            )
        querier = Querier(index=self.index, debug=debug)
        return querier.query(question, n=self.n, classes=classes, corpus=corpus)

populate(path: Path, parallel: bool = True, pipe: Optional[Pipeline] = None)

Populate the model with data from a given path.

PARAMETER DESCRIPTION
path

The path to the data (format depends on Importer).

TYPE: Path

parallel

Whether to run the pipeline in parallel.

TYPE: bool DEFAULT: True

pipe

The pipeline to use. If one is not supplied, a default one will be constructed.

TYPE: Optional[Pipeline] DEFAULT: None

Source code in summ/summ.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def populate(
    self,
    path: Path,
    parallel: bool = True,
    pipe: Optional[Pipeline] = None,
):
    """Populate the model with data from a given path.

    Args:
        path (Path): The path to the data (format depends on [Importer][summ.importers.Importer]).
        parallel (bool, optional): Whether to run the pipeline in parallel.
        pipe (Optional[Pipeline], optional): The pipeline to use. If one is not supplied, a default one will be constructed.
    """
    pipe = pipe or Pipeline.default(path, self.index)

    if not pipe.embedder.has_index():
        try:
            print("Creating index, this may take a while...")
            pipe.dprint("Create Index", pipe.embedder.index_name)
            pipe.embedder.create_index()
        except Exception as e:
            if "already exists" in str(e):
                msg = "Index already exists!"
            elif "quota" in str(e):
                msg = "You have exceeded the number of indexes for your Pinecone tier!"
            else:
                raise e
            print(msg)
            pipe.dprint(msg)
    else:
        pipe.dprint("Index already exists!")

    pipe.run(parallel=parallel)

query(question: str, classes: list[Classes] = [], corpus: list[Document] = [], debug: bool = True) -> str

Query a pre-populated model with a given question.

PARAMETER DESCRIPTION
question

The question to ask.

TYPE: str

n

The number of facts to use per sub-query.

TYPE: int

classes

The set of tags to use as filters (AND).

TYPE: list[Classes] DEFAULT: []

debug

Whether to print intermediate steps.

TYPE: bool DEFAULT: True

Source code in summ/summ.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def query(
    self,
    question: str,
    classes: list[Classes] = [],
    corpus: list[Document] = [],
    debug: bool = True,
) -> str:
    """
    Query a pre-populated model with a given question.

    Args:
        question (str): The question to ask.
        n (int, optional): The number of facts to use per sub-query.
        classes (list[Classes], optional): The set of tags to use as filters (AND).
        debug (bool, optional): Whether to print intermediate steps.
    """
    if not Embedder(self.index).has_index():
        raise Exception(
            f"Index {self.index} not found! Please run `summ populate` first."
        )
    querier = Querier(index=self.index, debug=debug)
    return querier.query(question, n=self.n, classes=classes, corpus=corpus)