Package `refget` documentation

create_refget_router

create_refget_router(sequences=False, collections=True, pangenomes=False)

Create a FastAPI router for the sequence collection API. This router provides endpoints for retrieving and comparing sequence collections. You can choose which endpoints to include by setting the sequences, collections, or pangenomes flags.

Parameters:

Name	Type	Description	Default
`sequences`	`bool`	Include sequence endpoints	`False`
`collections`	`bool`	Include sequence collection endpoints	`True`
`pangenomes`	`bool`	Include pangenome endpoints	`False`

Returns:

Name	Type	Description
`APIRouter`		A FastAPI router with the specified endpoints

Examples:

app.include_router(create_refget_router(sequences=False, pangenomes=False))

Source code in refget/refget_router.py

def create_refget_router(
    sequences: bool = False, collections: bool = True, pangenomes: bool = False
):
    """
    Create a FastAPI router for the sequence collection API.
    This router provides endpoints for retrieving and comparing sequence collections.
    You can choose which endpoints to include by setting the sequences, collections,
    or pangenomes flags.

    Args:
        sequences (bool): Include sequence endpoints
        collections (bool): Include sequence collection endpoints
        pangenomes (bool): Include pangenome endpoints

    Returns:
        APIRouter: A FastAPI router with the specified endpoints

    Examples:
        ```
        app.include_router(create_refget_router(sequences=False, pangenomes=False))
        ```
    """

    refget_router = APIRouter()
    if sequences:
        _LOGGER.info("Adding sequence endpoints...")
        refget_router.include_router(seq_router)
    if collections:
        _LOGGER.info("Adding collection endpoints...")
        refget_router.include_router(seqcol_router)
    if pangenomes:
        _LOGGER.info("Adding pangenome endpoints...")
        refget_router.include_router(pangenome_router)
    return refget_router

SequenceClient

SequenceClient(urls=['https://www.ebi.ac.uk/ena/cram'], raise_errors=None)

Bases: RefgetClient

A client for interacting with a refget sequences API.

Initializes the sequences client.

Parameters:

Name	Type	Description	Default
`urls`	`list`	A list of base URLs of the sequences API. Defaults to ["https://www.ebi.ac.uk/ena/cram/sequence/"].	`['https://www.ebi.ac.uk/ena/cram']`
`raise_errors`	`bool`	Whether to raise errors or log them. Defaults to None, which will guess.	`None`

Attributes: urls (list): The list of base URLs of the sequences API.

Source code in refget/clients.py

def __init__(self, urls=["https://www.ebi.ac.uk/ena/cram"], raise_errors=None):
    """
    Initializes the sequences client.

    Args:
        urls (list, optional): A list of base URLs of the sequences API. Defaults to ["https://www.ebi.ac.uk/ena/cram/sequence/"].
        raise_errors (bool, optional): Whether to raise errors or log them. Defaults to None, which will guess.
    Attributes:
        urls (list): The list of base URLs of the sequences API.
    """
    # Remove trailing slaches from input URLs
    self.urls = [url.rstrip("/") for url in urls]
    # If raise_errors is None, set it to True if the client is not being used as a library
    if raise_errors is None:
        raise_errors = __name__ == "__main__"
    self.raise_errors = raise_errors

get_metadata

get_metadata(digest)

Retrieves metadata for a given sequence digest.

Parameters:

Name	Type	Description	Default
`digest`	`str`	The digest of the sequence.	required

Returns:

Name	Type	Description
`dict`		The metadata.

Source code in refget/clients.py

def get_metadata(self, digest):
    """
    Retrieves metadata for a given sequence digest.

    Args:
        digest (str): The digest of the sequence.

    Returns:
        dict: The metadata.
    """
    endpoint = f"/sequence/{digest}/metadata"
    return _try_urls(self.urls, endpoint, raise_errors=self.raise_errors)

get_sequence

get_sequence(digest, start=None, end=None)

Retrieves a sequence for a given digest.

Parameters:

Name	Type	Description	Default
`digest`	`str`	The digest of the sequence.	required

Returns:

Name	Type	Description
`str`		The sequence.

Source code in refget/clients.py

def get_sequence(self, digest, start=None, end=None):
    """
    Retrieves a sequence for a given digest.

    Args:
        digest (str): The digest of the sequence.

    Returns:
        str: The sequence.
    """
    query_params = {}
    if start is not None:
        query_params["start"] = start
    if end is not None:
        query_params["end"] = end

    endpoint = f"/sequence/{digest}"
    return _try_urls(self.urls, endpoint, params=query_params, raise_errors=self.raise_errors)

SequenceCollectionClient

SequenceCollectionClient(urls=['https://seqcolapi.databio.org'], raise_errors=None)

Bases: RefgetClient

A client for interacting with a refget sequence collections API.

Initializes the sequence collection client.

Parameters:

Name	Type	Description	Default
`urls`	`list`	A list of base URLs of the sequence collection API. Defaults to ["https://seqcolapi.databio.org"].	`['https://seqcolapi.databio.org']`

Attributes:

Name	Type	Description
`urls`	`list`	The list of base URLs of the sequence collection API.

Source code in refget/clients.py

def __init__(self, urls=["https://seqcolapi.databio.org"], raise_errors=None):
    """
    Initializes the sequence collection client.

    Args:
        urls (list, optional): A list of base URLs of the sequence collection API. Defaults to ["https://seqcolapi.databio.org"].

    Attributes:
        urls (list): The list of base URLs of the sequence collection API.
    """
    # Remove trailing slaches from input URLs
    self.urls = [url.rstrip("/") for url in urls]
    # If raise_errors is None, set it to True if the client is not being used as a library
    if raise_errors is None:
        raise_errors = __name__ == "__main__"
    self.raise_errors = raise_errors

compare

compare(digest1, digest2)

Compares two sequence collections.

Parameters:

Name	Type	Description	Default
`digest1`	`str`	The digest of the first sequence collection.	required
`digest2`	`str`	The digest of the second sequence collection.	required

Returns:

Type	Description
`dict`	The JSON response containing the comparison of the two sequence collections.

Source code in refget/clients.py

def compare(self, digest1, digest2):
    """
    Compares two sequence collections.

    Args:
        digest1 (str): The digest of the first sequence collection.
        digest2 (str): The digest of the second sequence collection.

    Returns:
        (dict): The JSON response containing the comparison of the two sequence collections.
    """
    endpoint = f"/comparison/{digest1}/{digest2}"
    return _try_urls(self.urls, endpoint)

get_attribute

get_attribute(attribute, digest, level=2)

Retrieves a specific attribute for a given digest and detail level.

Parameters:

Name	Type	Description	Default
`attribute`	`str`	The attribute to retrieve.	required
`digest`	`str`	The digest of the attribute.	required

Returns:

Type	Description
`dict`	The JSON response containing the attribute.

Source code in refget/clients.py

def get_attribute(self, attribute, digest, level=2):
    """
    Retrieves a specific attribute for a given digest and detail level.

    Args:
        attribute (str): The attribute to retrieve.
        digest (str): The digest of the attribute.

    Returns:
        (dict): The JSON response containing the attribute.
    """
    endpoint = f"/attribute/collection/{attribute}/{digest}"
    return _try_urls(self.urls, endpoint)

get_collection

get_collection(digest, level=2)

Retrieves a sequence collection for a given digest and detail level.

Parameters:

Name	Type	Description	Default
`digest`	`str`	The digest of the sequence collection.	required
`level`	`int`	The level of detail for the sequence collection. Defaults to 2.	`2`

Returns:

Type	Description
`dict`	The JSON response containing the sequence collection.

Source code in refget/clients.py

def get_collection(self, digest, level=2):
    """
    Retrieves a sequence collection for a given digest and detail level.

    Args:
        digest (str): The digest of the sequence collection.
        level (int, optional): The level of detail for the sequence collection. Defaults to 2.

    Returns:
        (dict): The JSON response containing the sequence collection.
    """
    endpoint = f"/collection/{digest}?level={level}"
    return _try_urls(self.urls, endpoint)

list_attributes

list_attributes(attribute, page=None, page_size=None)

Lists all available values for a given attribute with optional paging support.

Parameters:

Name	Type	Description	Default
`attribute`	`str`	The attribute to list values for.	required
`page`	`int`	The page number to retrieve. Defaults to None.	`None`
`page_size`	`int`	The number of items per page. Defaults to None.	`None`

Returns:

Name	Type	Description
`dict`		The JSON response containing the list of available values for the attribute.

Source code in refget/clients.py

def list_attributes(self, attribute, page=None, page_size=None):
    """
    Lists all available values for a given attribute with optional paging support.

    Args:
        attribute (str): The attribute to list values for.
        page (int, optional): The page number to retrieve. Defaults to None.
        page_size (int, optional): The number of items per page. Defaults to None.

    Returns:
        dict: The JSON response containing the list of available values for the attribute.
    """
    params = {}
    if page is not None:
        params["page"] = page
    if page_size is not None:
        params["page_size"] = page_size

    endpoint = f"/list/attributes/{attribute}"
    return _try_urls(self.urls, endpoint, params=params)

list_collections

list_collections(page=None, page_size=None, attribute=None, attribute_digest=None)

Lists all available sequence collections with optional paging and attribute filtering support.

Parameters:

Name	Type	Description	Default
`page`	`int`	The page number to retrieve. Defaults to None.	`None`
`page_size`	`int`	The number of items per page. Defaults to None.	`None`
`attribute`	`str`	The attribute to filter by. Defaults to None.	`None`
`attribute_digest`	`str`	The attribute digest to filter by. Defaults to None.	`None`

Returns:

Name	Type	Description
`dict`		The JSON response containing the list of available sequence collections.

Source code in refget/clients.py

def list_collections(self, page=None, page_size=None, attribute=None, attribute_digest=None):
    """
    Lists all available sequence collections with optional paging and attribute filtering support.

    Args:
        page (int, optional): The page number to retrieve. Defaults to None.
        page_size (int, optional): The number of items per page. Defaults to None.
        attribute (str, optional): The attribute to filter by. Defaults to None.
        attribute_digest (str, optional): The attribute digest to filter by. Defaults to None.

    Returns:
        dict: The JSON response containing the list of available sequence collections.
    """
    params = {}
    if page is not None:
        params["page"] = page
    if page_size is not None:
        params["page_size"] = page_size

    if attribute and attribute_digest:
        endpoint = f"/list/collections/{attribute}/{attribute_digest}"
    else:
        endpoint = "/list/collections"

    return _try_urls(self.urls, endpoint, params=params)

service_info

service_info()

Retrieves information about the service.

Returns:

Name	Type	Description
`dict`		The service information.

Source code in refget/clients.py

def service_info(self):
    """
    Retrieves information about the service.

    Returns:
        dict: The service information.
    """
    endpoint = "/service-info"
    return _try_urls(self.urls, endpoint)

RefgetDBAgent

RefgetDBAgent(engine=None, postgres_str=None, schema=f'{SCHEMA_FILEPATH}/seqcol.json', inherent_attrs=['names', 'lengths', 'sequences'])

Bases: object

Primary aggregator agent, interface to all other agents

Source code in refget/agents.py

def __init__(
    self,
    engine: Optional[SqlalchemyDatabaseEngine] = None,
    postgres_str: Optional[str] = None,
    schema=f"{SCHEMA_FILEPATH}/seqcol.json",
    inherent_attrs: List[str] = ["names", "lengths", "sequences"],
):  # = "sqlite:///foo.db"
    if engine is not None:
        self.engine = engine
    else:
        if not postgres_str:
            # Configure via environment variables
            POSTGRES_HOST = os.getenv("POSTGRES_HOST")
            POSTGRES_DB = os.getenv("POSTGRES_DB")
            POSTGRES_USER = os.getenv("POSTGRES_USER")
            POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")
            postgres_str = URL.create(
                "postgresql",
                username=POSTGRES_USER,
                password=POSTGRES_PASSWORD,
                host=POSTGRES_HOST,
                database=POSTGRES_DB,
            )

        try:
            self.engine = create_engine(postgres_str, echo=False)
        except Exception as e:
            _LOGGER.error(f"Error: {e}")
            _LOGGER.error("Unable to connect to database")
            _LOGGER.error(
                "Please check that you have set the database credentials correctly in the environment variables"
            )
            _LOGGER.error(f"Database engine string: {postgres_str}")
            raise e
    try:
        SQLModel.metadata.create_all(self.engine)
    except Exception as e:
        _LOGGER.error(f"Error: {e}")
        _LOGGER.error("Unable to create tables in the database")
        raise e

    # Read schema
    if schema:
        self.schema_dict = load_json(schema)
        _LOGGER.info(f"Schema: {self.schema_dict}")
        try:
            self.inherent_attrs = self.schema_dict["ga4gh"]["inherent"]
        except KeyError:
            self.inherent_attrs = inherent_attrs
            _LOGGER.warning(
                f"No 'inherent' attributes found in schema; using defaults: {inherent_attrs}"
            )
    else:
        _LOGGER.warning("No schema provided; using defaults")
        self.schema_dict = None
        self.inherent_attrs = inherent_attrs

    self.__sequence = SequenceAgent(self.engine)
    self.__seqcol = SequenceCollectionAgent(self.engine, self.inherent_attrs)
    self.__pangenome = PangenomeAgent(self)
    self.__attribute = AttributeAgent(self.engine)

truncate

truncate()

Delete all records from the database

Source code in refget/agents.py

def truncate(self):
    """Delete all records from the database"""

    with Session(self.engine) as session:
        statement = delete(SequenceCollection)
        result1 = session.exec(statement)
        statement = delete(Pangenome)
        result = session.exec(statement)
        statement = delete(NamesAttr)
        result = session.exec(statement)
        statement = delete(LengthsAttr)
        result = session.exec(statement)
        statement = delete(SequencesAttr)
        result = session.exec(statement)
        statement = delete(SortedNameLengthPairsAttr)
        result = session.exec(statement)
        statement = delete(NameLengthPairsAttr)
        result = session.exec(statement)
        statement = delete(SortedSequencesAttr)
        result = session.exec(statement)

        session.commit()
        return result1.rowcount

SequenceCollectionAgent

SequenceCollectionAgent(engine, inherent_attrs=None)

Bases: object

Agent for interacting with database of sequence collection

Source code in refget/agents.py

def __init__(self, engine, inherent_attrs=None):
    self.engine = engine
    self.inherent_attrs = inherent_attrs

SequenceAgent

SequenceAgent(engine)

Bases: object

Agent for interacting with database of sequences

Source code in refget/agents.py

def __init__(self, engine):
    self.engine = engine

Package refget documentation

create_refget_router

SequenceClient

get_metadata

get_sequence

SequenceCollectionClient

compare

get_attribute

get_collection

list_attributes

list_collections

service_info

RefgetDBAgent

truncate

SequenceCollectionAgent

SequenceAgent

Package `refget` documentation