Skip to content

Package refget documentation

create_refget_router

create_refget_router(sequences=False, collections=True, pangenomes=False)

Create a FastAPI router for the sequence collection API. This router provides endpoints for retrieving and comparing sequence collections. You can choose which endpoints to include by setting the sequences, collections, or pangenomes flags.

Parameters:

Name Type Description Default
sequences bool

Include sequence endpoints

False
collections bool

Include sequence collection endpoints

True
pangenomes bool

Include pangenome endpoints

False

Returns:

Name Type Description
APIRouter

A FastAPI router with the specified endpoints

Examples:

app.include_router(create_refget_router(sequences=False, pangenomes=False))
Source code in refget/refget_router.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def create_refget_router(
    sequences: bool = False, collections: bool = True, pangenomes: bool = False
):
    """
    Create a FastAPI router for the sequence collection API.
    This router provides endpoints for retrieving and comparing sequence collections.
    You can choose which endpoints to include by setting the sequences, collections,
    or pangenomes flags.

    Args:
        sequences (bool): Include sequence endpoints
        collections (bool): Include sequence collection endpoints
        pangenomes (bool): Include pangenome endpoints

    Returns:
        APIRouter: A FastAPI router with the specified endpoints

    Examples:
        ```
        app.include_router(create_refget_router(sequences=False, pangenomes=False))
        ```
    """

    refget_router = APIRouter()
    if sequences:
        _LOGGER.info("Adding sequence endpoints...")
        refget_router.include_router(seq_router)
    if collections:
        _LOGGER.info("Adding collection endpoints...")
        refget_router.include_router(seqcol_router)
    if pangenomes:
        _LOGGER.info("Adding pangenome endpoints...")
        refget_router.include_router(pangenome_router)
    return refget_router

SequenceClient

SequenceClient(urls=['https://www.ebi.ac.uk/ena/cram'], raise_errors=None)

Bases: RefgetClient

A client for interacting with a refget sequences API.

Initializes the sequences client.

Parameters:

Name Type Description Default
urls list

A list of base URLs of the sequences API. Defaults to ["https://www.ebi.ac.uk/ena/cram/sequence/"].

['https://www.ebi.ac.uk/ena/cram']
raise_errors bool

Whether to raise errors or log them. Defaults to None, which will guess.

None

Attributes: urls (list): The list of base URLs of the sequences API.

Source code in refget/clients.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def __init__(self, urls=["https://www.ebi.ac.uk/ena/cram"], raise_errors=None):
    """
    Initializes the sequences client.

    Args:
        urls (list, optional): A list of base URLs of the sequences API. Defaults to ["https://www.ebi.ac.uk/ena/cram/sequence/"].
        raise_errors (bool, optional): Whether to raise errors or log them. Defaults to None, which will guess.
    Attributes:
        urls (list): The list of base URLs of the sequences API.
    """
    # Remove trailing slaches from input URLs
    self.urls = [url.rstrip("/") for url in urls]
    # If raise_errors is None, set it to True if the client is not being used as a library
    if raise_errors is None:
        raise_errors = __name__ == "__main__"
    self.raise_errors = raise_errors

get_metadata

get_metadata(digest)

Retrieves metadata for a given sequence digest.

Parameters:

Name Type Description Default
digest str

The digest of the sequence.

required

Returns:

Name Type Description
dict

The metadata.

Source code in refget/clients.py
78
79
80
81
82
83
84
85
86
87
88
89
def get_metadata(self, digest):
    """
    Retrieves metadata for a given sequence digest.

    Args:
        digest (str): The digest of the sequence.

    Returns:
        dict: The metadata.
    """
    endpoint = f"/sequence/{digest}/metadata"
    return _try_urls(self.urls, endpoint, raise_errors=self.raise_errors)

get_sequence

get_sequence(digest, start=None, end=None)

Retrieves a sequence for a given digest.

Parameters:

Name Type Description Default
digest str

The digest of the sequence.

required

Returns:

Name Type Description
str

The sequence.

Source code in refget/clients.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def get_sequence(self, digest, start=None, end=None):
    """
    Retrieves a sequence for a given digest.

    Args:
        digest (str): The digest of the sequence.

    Returns:
        str: The sequence.
    """
    query_params = {}
    if start is not None:
        query_params["start"] = start
    if end is not None:
        query_params["end"] = end

    endpoint = f"/sequence/{digest}"
    return _try_urls(self.urls, endpoint, params=query_params, raise_errors=self.raise_errors)

SequenceCollectionClient

SequenceCollectionClient(urls=['https://seqcolapi.databio.org'], raise_errors=None)

Bases: RefgetClient

A client for interacting with a refget sequence collections API.

Initializes the sequence collection client.

Parameters:

Name Type Description Default
urls list

A list of base URLs of the sequence collection API. Defaults to ["https://seqcolapi.databio.org"].

['https://seqcolapi.databio.org']

Attributes:

Name Type Description
urls list

The list of base URLs of the sequence collection API.

Source code in refget/clients.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def __init__(self, urls=["https://seqcolapi.databio.org"], raise_errors=None):
    """
    Initializes the sequence collection client.

    Args:
        urls (list, optional): A list of base URLs of the sequence collection API. Defaults to ["https://seqcolapi.databio.org"].

    Attributes:
        urls (list): The list of base URLs of the sequence collection API.
    """
    # Remove trailing slaches from input URLs
    self.urls = [url.rstrip("/") for url in urls]
    # If raise_errors is None, set it to True if the client is not being used as a library
    if raise_errors is None:
        raise_errors = __name__ == "__main__"
    self.raise_errors = raise_errors

compare

compare(digest1, digest2)

Compares two sequence collections.

Parameters:

Name Type Description Default
digest1 str

The digest of the first sequence collection.

required
digest2 str

The digest of the second sequence collection.

required

Returns:

Type Description
dict

The JSON response containing the comparison of the two sequence collections.

Source code in refget/clients.py
142
143
144
145
146
147
148
149
150
151
152
153
154
def compare(self, digest1, digest2):
    """
    Compares two sequence collections.

    Args:
        digest1 (str): The digest of the first sequence collection.
        digest2 (str): The digest of the second sequence collection.

    Returns:
        (dict): The JSON response containing the comparison of the two sequence collections.
    """
    endpoint = f"/comparison/{digest1}/{digest2}"
    return _try_urls(self.urls, endpoint)

get_attribute

get_attribute(attribute, digest, level=2)

Retrieves a specific attribute for a given digest and detail level.

Parameters:

Name Type Description Default
attribute str

The attribute to retrieve.

required
digest str

The digest of the attribute.

required

Returns:

Type Description
dict

The JSON response containing the attribute.

Source code in refget/clients.py
128
129
130
131
132
133
134
135
136
137
138
139
140
def get_attribute(self, attribute, digest, level=2):
    """
    Retrieves a specific attribute for a given digest and detail level.

    Args:
        attribute (str): The attribute to retrieve.
        digest (str): The digest of the attribute.

    Returns:
        (dict): The JSON response containing the attribute.
    """
    endpoint = f"/attribute/collection/{attribute}/{digest}"
    return _try_urls(self.urls, endpoint)

get_collection

get_collection(digest, level=2)

Retrieves a sequence collection for a given digest and detail level.

Parameters:

Name Type Description Default
digest str

The digest of the sequence collection.

required
level int

The level of detail for the sequence collection. Defaults to 2.

2

Returns:

Type Description
dict

The JSON response containing the sequence collection.

Source code in refget/clients.py
114
115
116
117
118
119
120
121
122
123
124
125
126
def get_collection(self, digest, level=2):
    """
    Retrieves a sequence collection for a given digest and detail level.

    Args:
        digest (str): The digest of the sequence collection.
        level (int, optional): The level of detail for the sequence collection. Defaults to 2.

    Returns:
        (dict): The JSON response containing the sequence collection.
    """
    endpoint = f"/collection/{digest}?level={level}"
    return _try_urls(self.urls, endpoint)

list_attributes

list_attributes(attribute, page=None, page_size=None)

Lists all available values for a given attribute with optional paging support.

Parameters:

Name Type Description Default
attribute str

The attribute to list values for.

required
page int

The page number to retrieve. Defaults to None.

None
page_size int

The number of items per page. Defaults to None.

None

Returns:

Name Type Description
dict

The JSON response containing the list of available values for the attribute.

Source code in refget/clients.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
def list_attributes(self, attribute, page=None, page_size=None):
    """
    Lists all available values for a given attribute with optional paging support.

    Args:
        attribute (str): The attribute to list values for.
        page (int, optional): The page number to retrieve. Defaults to None.
        page_size (int, optional): The number of items per page. Defaults to None.

    Returns:
        dict: The JSON response containing the list of available values for the attribute.
    """
    params = {}
    if page is not None:
        params["page"] = page
    if page_size is not None:
        params["page_size"] = page_size

    endpoint = f"/list/attributes/{attribute}"
    return _try_urls(self.urls, endpoint, params=params)

list_collections

list_collections(page=None, page_size=None, attribute=None, attribute_digest=None)

Lists all available sequence collections with optional paging and attribute filtering support.

Parameters:

Name Type Description Default
page int

The page number to retrieve. Defaults to None.

None
page_size int

The number of items per page. Defaults to None.

None
attribute str

The attribute to filter by. Defaults to None.

None
attribute_digest str

The attribute digest to filter by. Defaults to None.

None

Returns:

Name Type Description
dict

The JSON response containing the list of available sequence collections.

Source code in refget/clients.py
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def list_collections(self, page=None, page_size=None, attribute=None, attribute_digest=None):
    """
    Lists all available sequence collections with optional paging and attribute filtering support.

    Args:
        page (int, optional): The page number to retrieve. Defaults to None.
        page_size (int, optional): The number of items per page. Defaults to None.
        attribute (str, optional): The attribute to filter by. Defaults to None.
        attribute_digest (str, optional): The attribute digest to filter by. Defaults to None.

    Returns:
        dict: The JSON response containing the list of available sequence collections.
    """
    params = {}
    if page is not None:
        params["page"] = page
    if page_size is not None:
        params["page_size"] = page_size

    if attribute and attribute_digest:
        endpoint = f"/list/collections/{attribute}/{attribute_digest}"
    else:
        endpoint = "/list/collections"

    return _try_urls(self.urls, endpoint, params=params)

service_info

service_info()

Retrieves information about the service.

Returns:

Name Type Description
dict

The service information.

Source code in refget/clients.py
203
204
205
206
207
208
209
210
211
def service_info(self):
    """
    Retrieves information about the service.

    Returns:
        dict: The service information.
    """
    endpoint = "/service-info"
    return _try_urls(self.urls, endpoint)

RefgetDBAgent

RefgetDBAgent(engine=None, postgres_str=None, schema=f'{SCHEMA_FILEPATH}/seqcol.json', inherent_attrs=['names', 'lengths', 'sequences'])

Bases: object

Primary aggregator agent, interface to all other agents

Source code in refget/agents.py
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
def __init__(
    self,
    engine: Optional[SqlalchemyDatabaseEngine] = None,
    postgres_str: Optional[str] = None,
    schema=f"{SCHEMA_FILEPATH}/seqcol.json",
    inherent_attrs: List[str] = ["names", "lengths", "sequences"],
):  # = "sqlite:///foo.db"
    if engine is not None:
        self.engine = engine
    else:
        if not postgres_str:
            # Configure via environment variables
            POSTGRES_HOST = os.getenv("POSTGRES_HOST")
            POSTGRES_DB = os.getenv("POSTGRES_DB")
            POSTGRES_USER = os.getenv("POSTGRES_USER")
            POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD")
            postgres_str = URL.create(
                "postgresql",
                username=POSTGRES_USER,
                password=POSTGRES_PASSWORD,
                host=POSTGRES_HOST,
                database=POSTGRES_DB,
            )

        try:
            self.engine = create_engine(postgres_str, echo=False)
        except Exception as e:
            _LOGGER.error(f"Error: {e}")
            _LOGGER.error("Unable to connect to database")
            _LOGGER.error(
                "Please check that you have set the database credentials correctly in the environment variables"
            )
            _LOGGER.error(f"Database engine string: {postgres_str}")
            raise e
    try:
        SQLModel.metadata.create_all(self.engine)
    except Exception as e:
        _LOGGER.error(f"Error: {e}")
        _LOGGER.error("Unable to create tables in the database")
        raise e

    # Read schema
    if schema:
        self.schema_dict = load_json(schema)
        _LOGGER.info(f"Schema: {self.schema_dict}")
        try:
            self.inherent_attrs = self.schema_dict["ga4gh"]["inherent"]
        except KeyError:
            self.inherent_attrs = inherent_attrs
            _LOGGER.warning(
                f"No 'inherent' attributes found in schema; using defaults: {inherent_attrs}"
            )
    else:
        _LOGGER.warning("No schema provided; using defaults")
        self.schema_dict = None
        self.inherent_attrs = inherent_attrs

    self.__sequence = SequenceAgent(self.engine)
    self.__seqcol = SequenceCollectionAgent(self.engine, self.inherent_attrs)
    self.__pangenome = PangenomeAgent(self)
    self.__attribute = AttributeAgent(self.engine)

truncate

truncate()

Delete all records from the database

Source code in refget/agents.py
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
def truncate(self):
    """Delete all records from the database"""

    with Session(self.engine) as session:
        statement = delete(SequenceCollection)
        result1 = session.exec(statement)
        statement = delete(Pangenome)
        result = session.exec(statement)
        statement = delete(NamesAttr)
        result = session.exec(statement)
        statement = delete(LengthsAttr)
        result = session.exec(statement)
        statement = delete(SequencesAttr)
        result = session.exec(statement)
        statement = delete(SortedNameLengthPairsAttr)
        result = session.exec(statement)
        statement = delete(NameLengthPairsAttr)
        result = session.exec(statement)
        statement = delete(SortedSequencesAttr)
        result = session.exec(statement)

        session.commit()
        return result1.rowcount

SequenceCollectionAgent

SequenceCollectionAgent(engine, inherent_attrs=None)

Bases: object

Agent for interacting with database of sequence collection

Source code in refget/agents.py
147
148
149
def __init__(self, engine, inherent_attrs=None):
    self.engine = engine
    self.inherent_attrs = inherent_attrs

SequenceAgent

SequenceAgent(engine)

Bases: object

Agent for interacting with database of sequences

Source code in refget/agents.py
79
80
def __init__(self, engine):
    self.engine = engine