API Reference¶

`summ.Summ` ¶

The main entry point for both populating and querying the model.

Source code in summ/summ.py

class Summ:
    """The main entry point for both populating and querying the model."""

    def __init__(self, index: str = "sum-facts", n: int = 3):
        self.index = index
        self.n = n

    def populate(
        self,
        path: Path,
        parallel: bool = True,
        pipe: Optional[Pipeline] = None,
    ):
        """Populate the model with data from a given path.

        Args:
            path (Path): The path to the data (format depends on [Importer][summ.importers.Importer]).
            parallel (bool, optional): Whether to run the pipeline in parallel.
            pipe (Optional[Pipeline], optional): The pipeline to use. If one is not supplied, a default one will be constructed.
        """
        pipe = pipe or Pipeline.default(path, self.index)

        if not pipe.embedder.has_index():
            try:
                print("Creating index, this may take a while...")
                pipe.dprint("Create Index", pipe.embedder.index_name)
                pipe.embedder.create_index()
            except Exception as e:
                if "already exists" in str(e):
                    msg = "Index already exists!"
                elif "quota" in str(e):
                    msg = "You have exceeded the number of indexes for your Pinecone tier!"
                else:
                    raise e
                print(msg)
                pipe.dprint(msg)
        else:
            pipe.dprint("Index already exists!")

        pipe.run(parallel=parallel)

    def query(
        self,
        question: str,
        classes: list[Classes] = [],
        corpus: list[Document] = [],
        debug: bool = True,
    ) -> str:
        """
        Query a pre-populated model with a given question.

        Args:
            question (str): The question to ask.
            n (int, optional): The number of facts to use per sub-query.
            classes (list[Classes], optional): The set of tags to use as filters (AND).
            debug (bool, optional): Whether to print intermediate steps.
        """
        if not Embedder(self.index).has_index():
            raise Exception(
                f"Index {self.index} not found! Please run `summ populate` first."
            )
        querier = Querier(index=self.index, debug=debug)
        return querier.query(question, n=self.n, classes=classes, corpus=corpus)

`populate(path: Path, parallel: bool = True, pipe: Optional[Pipeline] = None)` ¶

Populate the model with data from a given path.

PARAMETER DESCRIPTION

path

The path to the data (format depends on Importer).

TYPE: Path

parallel

Whether to run the pipeline in parallel.

TYPE: bool DEFAULT: True

pipe

The pipeline to use. If one is not supplied, a default one will be constructed.

TYPE: Optional[Pipeline] DEFAULT: None

Source code in summ/summ.py

def populate(
    self,
    path: Path,
    parallel: bool = True,
    pipe: Optional[Pipeline] = None,
):
    """Populate the model with data from a given path.

    Args:
        path (Path): The path to the data (format depends on [Importer][summ.importers.Importer]).
        parallel (bool, optional): Whether to run the pipeline in parallel.
        pipe (Optional[Pipeline], optional): The pipeline to use. If one is not supplied, a default one will be constructed.
    """
    pipe = pipe or Pipeline.default(path, self.index)

    if not pipe.embedder.has_index():
        try:
            print("Creating index, this may take a while...")
            pipe.dprint("Create Index", pipe.embedder.index_name)
            pipe.embedder.create_index()
        except Exception as e:
            if "already exists" in str(e):
                msg = "Index already exists!"
            elif "quota" in str(e):
                msg = "You have exceeded the number of indexes for your Pinecone tier!"
            else:
                raise e
            print(msg)
            pipe.dprint(msg)
    else:
        pipe.dprint("Index already exists!")

    pipe.run(parallel=parallel)

`query(question: str, classes: list[Classes] = [], corpus: list[Document] = [], debug: bool = True) -> str` ¶

Query a pre-populated model with a given question.

PARAMETER	DESCRIPTION
`question`	The question to ask. TYPE: `str`
`n`	The number of facts to use per sub-query. TYPE: `int`
`classes`	The set of tags to use as filters (AND). TYPE: `list[Classes]` DEFAULT: `[]`
`debug`	Whether to print intermediate steps. TYPE: `bool` DEFAULT: `True`

Source code in summ/summ.py

def query(
    self,
    question: str,
    classes: list[Classes] = [],
    corpus: list[Document] = [],
    debug: bool = True,
) -> str:
    """
    Query a pre-populated model with a given question.

    Args:
        question (str): The question to ask.
        n (int, optional): The number of facts to use per sub-query.
        classes (list[Classes], optional): The set of tags to use as filters (AND).
        debug (bool, optional): Whether to print intermediate steps.
    """
    if not Embedder(self.index).has_index():
        raise Exception(
            f"Index {self.index} not found! Please run `summ populate` first."
        )
    querier = Querier(index=self.index, debug=debug)
    return querier.query(question, n=self.n, classes=classes, corpus=corpus)

API Reference¶

summ.Summ ¶

populate(path: Path, parallel: bool = True, pipe: Optional[Pipeline] = None) ¶

query(question: str, classes: list[Classes] = [], corpus: list[Document] = [], debug: bool = True) -> str ¶

`summ.Summ` ¶

`populate(path: Path, parallel: bool = True, pipe: Optional[Pipeline] = None)` ¶

`query(question: str, classes: list[Classes] = [], corpus: list[Document] = [], debug: bool = True) -> str` ¶