Skip to content

Core Reference

encord_agents.core.data_model

Frame dataclass

A dataclass to hold the content of one frame in a video.

Source code in encord_agents/core/data_model.py
@dataclass(frozen=True)
class Frame:
    """
    A dataclass to hold the content of one frame in a video.
    """

    frame: int
    """
    The frame number within the video
    """
    content: "NDArray[np.uint8]"
    """
    An [h,w,c] np.array with color channels RGB.
    """

    @overload
    def b64_encoding(
        self,
        image_format: Base64Formats = ".jpeg",
        output_format: Literal["raw", "url"] = "raw",
    ) -> str: ...

    @overload
    def b64_encoding(
        self,
        image_format: Literal[".jpeg", ".jpg", ".png"] = ".jpeg",
        output_format: Literal["openai", "anthropic"] = "openai",
    ) -> dict[str, str | dict[str, str]]: ...

    def b64_encoding(
        self,
        image_format: Literal[".jpeg", ".jpg", ".png"] = ".jpeg",
        output_format: Literal["url", "openai", "anthropic", "raw"] = "url",
    ) -> str | dict[str, str | dict[str, str]]:
        """
        Get a base64 representation of the image content.

        This method allows you to convert the content into a base64 representation
        based on various different image encodings.
        This is useful, e.g., for prompting LLMs with image content.


        Please see details for formats below.

        Args:
            image_format: Which type of image encoding to use.
            output_format: Different common formats.
                - `raw`: the image content as a raw b64 string
                - `url`: url encoded image content. Compatible with, e.g., `<img src="<the_encoding>" />`
                - `openai`: a dict with `type` and `image_url` keys
                _ `anthropic`: a dict with `media_type`, `type`, and `data` keys.

        Returns: a dict or string depending on `output_format`.

        """
        b64_str = b64_encode_image(self.content, image_format)
        if output_format == "raw":
            return b64_str

        media_type = DATA_TYPES.get(image_format, f"image/{image_format.replace('.', '')}")
        image_url = f"data:{media_type};base64,{b64_str}"
        if output_format == "url":
            return image_url
        elif output_format == "openai":
            return {
                "type": "image_url",
                "image_url": {
                    "url": image_url,
                },
            }
        elif output_format == "anthropic":
            return {
                "type": "image",
                "source": {
                    "type": "base64",
                    "media_type": media_type,
                    "data": b64_str,
                },
            }
content instance-attribute
content: NDArray[uint8]

An [h,w,c] np.array with color channels RGB.

frame instance-attribute
frame: int

The frame number within the video

b64_encoding
b64_encoding(image_format: Base64Formats = '.jpeg', output_format: Literal['raw', 'url'] = 'raw') -> str
b64_encoding(image_format: Literal['.jpeg', '.jpg', '.png'] = '.jpeg', output_format: Literal['openai', 'anthropic'] = 'openai') -> dict[str, str | dict[str, str]]
b64_encoding(image_format: Literal['.jpeg', '.jpg', '.png'] = '.jpeg', output_format: Literal['url', 'openai', 'anthropic', 'raw'] = 'url') -> str | dict[str, str | dict[str, str]]

Get a base64 representation of the image content.

This method allows you to convert the content into a base64 representation based on various different image encodings. This is useful, e.g., for prompting LLMs with image content.

Please see details for formats below.

Parameters:

  • image_format (Literal['.jpeg', '.jpg', '.png'], default: '.jpeg' ) –

    Which type of image encoding to use.

  • output_format (Literal['url', 'openai', 'anthropic', 'raw'], default: 'url' ) –

    Different common formats. - raw: the image content as a raw b64 string - url: url encoded image content. Compatible with, e.g., <img src="<the_encoding>" /> - openai: a dict with type and image_url keys _ anthropic: a dict with media_type, type, and data keys.

Returns: a dict or string depending on output_format.

Source code in encord_agents/core/data_model.py
def b64_encoding(
    self,
    image_format: Literal[".jpeg", ".jpg", ".png"] = ".jpeg",
    output_format: Literal["url", "openai", "anthropic", "raw"] = "url",
) -> str | dict[str, str | dict[str, str]]:
    """
    Get a base64 representation of the image content.

    This method allows you to convert the content into a base64 representation
    based on various different image encodings.
    This is useful, e.g., for prompting LLMs with image content.


    Please see details for formats below.

    Args:
        image_format: Which type of image encoding to use.
        output_format: Different common formats.
            - `raw`: the image content as a raw b64 string
            - `url`: url encoded image content. Compatible with, e.g., `<img src="<the_encoding>" />`
            - `openai`: a dict with `type` and `image_url` keys
            _ `anthropic`: a dict with `media_type`, `type`, and `data` keys.

    Returns: a dict or string depending on `output_format`.

    """
    b64_str = b64_encode_image(self.content, image_format)
    if output_format == "raw":
        return b64_str

    media_type = DATA_TYPES.get(image_format, f"image/{image_format.replace('.', '')}")
    image_url = f"data:{media_type};base64,{b64_str}"
    if output_format == "url":
        return image_url
    elif output_format == "openai":
        return {
            "type": "image_url",
            "image_url": {
                "url": image_url,
            },
        }
    elif output_format == "anthropic":
        return {
            "type": "image",
            "source": {
                "type": "base64",
                "media_type": media_type,
                "data": b64_str,
            },
        }

FrameData

Bases: BaseModel

Holds the data sent from the Encord Label Editor at the time of triggering the agent.

Source code in encord_agents/core/data_model.py
class FrameData(BaseModel):
    """
    Holds the data sent from the Encord Label Editor at the time of triggering the agent.
    """

    project_hash: UUID = Field(validation_alias="projectHash")
    """
    The identifier of the given project.
    """
    data_hash: UUID = Field(validation_alias="dataHash")
    """
    The identifier of the given data asset.
    """
    frame: int = Field(ge=0)
    """
    The frame number. If single image, it's default 0.
    """
data_hash class-attribute instance-attribute
data_hash: UUID = Field(validation_alias='dataHash')

The identifier of the given data asset.

frame class-attribute instance-attribute
frame: int = Field(ge=0)

The frame number. If single image, it's default 0.

project_hash class-attribute instance-attribute
project_hash: UUID = Field(validation_alias='projectHash')

The identifier of the given project.

InstanceCrop dataclass

Bases: Frame

A dataclass to hold the frame content of one object instance in a video or image.

Source code in encord_agents/core/data_model.py
@dataclass(frozen=True)
class InstanceCrop(Frame):
    """
    A dataclass to hold the frame content of one object instance in a video or image.
    """

    instance: ObjectInstance
    r"""
    The [ObjectInstance](https://docs.encord.com/sdk-documentation/sdk-references/ObjectInstance#objectinstance){ target="\_blank", rel="noopener noreferrer" } associated to the crop.
    """
instance instance-attribute
instance: ObjectInstance

The ObjectInstance associated to the crop.

LabelRowInitialiseLabelsArgs

Bases: BaseModel

Arguments used to specify how to initialise labels via the SDK.

The arguments are passed to LabelRowV2.initialise_labels.

Source code in encord_agents/core/data_model.py
class LabelRowInitialiseLabelsArgs(BaseModel):
    """
    Arguments used to specify how to initialise labels via the SDK.

    The arguments are passed to `LabelRowV2.initialise_labels`.
    """

    include_object_feature_hashes: set[str] | None = None
    include_classification_feature_hashes: set[str] | None = None
    include_reviews: bool = False
    overwrite: bool = False
    include_signed_url: bool = False

LabelRowMetadataIncludeArgs

Bases: BaseModel

Warning, including metadata via label rows is good for reading metadata not for writing to the metadata.

If you need to write to metadata, use the dep_storage_item dependencies instead.

Source code in encord_agents/core/data_model.py
class LabelRowMetadataIncludeArgs(BaseModel):
    """
    Warning, including metadata via label rows is good for _reading_ metadata
    **not** for writing to the metadata.

    If you need to write to metadata, use the `dep_storage_item` dependencies instead.
    """

    include_workflow_graph_node: bool = True
    include_client_metadata: bool = False
    include_images_data: bool = False
    include_all_label_branches: bool = False

encord_agents.core.dependencies

shares

DataLookup
Source code in encord_agents/core/dependencies/shares.py
class DataLookup:
    __instances__: dict[UUID, DataLookup] = {}

    def __init__(self, dataset_hashes: list[str | UUID] | None = None) -> None:
        self.user_client = get_user_client()
        self.datasets = {UUID(d): self.user_client.get_dataset(d) for d in map(str, dataset_hashes or [])}
        self.data_rows = {dr.uid: dr for dataset in self.datasets.values() for dr in dataset.data_rows}

    @classmethod
    def sharable(cls, project: Project) -> DataLookup:
        ph = UUID(project.project_hash)
        if ph not in cls.__instances__:
            cls.__instances__[ph] = cls([ds.dataset_hash for ds in project.list_datasets()])
        return cls.__instances__[ph]

    def get_data_row(self, data_hash: str | UUID, dataset_hash: str | UUID | None = None) -> DataRow:
        dr = self.data_rows.get(str(data_hash))
        if dr is None:
            if dataset_hash is not None:
                dataset = self.user_client.get_dataset(str(dataset_hash))
                self.datasets[UUID(str(dataset_hash))] = dataset
                self.data_rows.update({dr.uid: dr for dr in dataset.data_rows})
            else:
                raise ValueError(
                    "Cannot identify a data row without knowing the dataset hash. Please provide it to the function call or to the constructor of the `DataLookup`"
                )
            dr = self.data_rows.get(str(data_hash))
        if dr is None:
            raise ValueError("DatasetCache was not able to locate data row from data hash")
        return dr

    @property
    def backing_item_uuids(self) -> list[UUID]:
        """
        Get all backing item uuids for all data rows in the data lookup.
        """
        return [dr.backing_item_uuid for dr in self.data_rows.values()]

    def get_storage_item(
        self, data_hash: str | UUID, dataset_hash: str | UUID | None = None, sign_url: bool = False
    ) -> StorageItem:
        """

        Args:
            data_hash: Data hash for the asset for which you need the underlying storage item.
            dataset_hash: If you didn't provide the associated dataset hash in the constructor,
                this is your last chance.
            sign_url: If `True`, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).

        Raises:
            ValueError: Mainly if underlying data row cannot be found.

        Returns:
            The underlying storage item from which, e.g., client metadata can be updated.

        """
        try:
            dr = self.get_data_row(data_hash, dataset_hash)
        except ValueError:
            raise ValueError(
                "DatasetCache was not able to locate storage_item because the associated data row could not be identified."
            )

        return self.user_client.get_storage_item(dr.backing_item_uuid)

    def get_storage_items(
        self, data_hashes: list[str | UUID], dataset_hash: str | UUID | None = None, sign_urls: bool = False
    ) -> list[StorageItem]:
        """

        Args:
            data_hashes: Data hashes for the assets for which you need the underlying storage items.
            dataset_hash: If you didn't provide the associated dataset hash in the constructor,
                this is your last chance.
            sign_urls: If `True`, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).
            return self.user_client.get_storage_items()

        Raises:
            ValueError: Mainly if underlying data row cannot be found.

        Returns:
            list of underlying storage items from which, e.g., client metadata can be updated.
        """
        try:
            data_rows = [self.get_data_row(i, dataset_hash) for i in data_hashes]
        except ValueError:
            raise ValueError("Failed to load storage items because one or more data rows could not be obtained")

        return self.user_client.get_storage_items([dr.backing_item_uuid for dr in data_rows])
backing_item_uuids property
backing_item_uuids: list[UUID]

Get all backing item uuids for all data rows in the data lookup.

get_storage_item
get_storage_item(data_hash: str | UUID, dataset_hash: str | UUID | None = None, sign_url: bool = False) -> StorageItem

Parameters:

  • data_hash (str | UUID) –

    Data hash for the asset for which you need the underlying storage item.

  • dataset_hash (str | UUID | None, default: None ) –

    If you didn't provide the associated dataset hash in the constructor, this is your last chance.

  • sign_url (bool, default: False ) –

    If True, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).

Raises:

  • ValueError

    Mainly if underlying data row cannot be found.

Returns:

  • StorageItem

    The underlying storage item from which, e.g., client metadata can be updated.

Source code in encord_agents/core/dependencies/shares.py
def get_storage_item(
    self, data_hash: str | UUID, dataset_hash: str | UUID | None = None, sign_url: bool = False
) -> StorageItem:
    """

    Args:
        data_hash: Data hash for the asset for which you need the underlying storage item.
        dataset_hash: If you didn't provide the associated dataset hash in the constructor,
            this is your last chance.
        sign_url: If `True`, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).

    Raises:
        ValueError: Mainly if underlying data row cannot be found.

    Returns:
        The underlying storage item from which, e.g., client metadata can be updated.

    """
    try:
        dr = self.get_data_row(data_hash, dataset_hash)
    except ValueError:
        raise ValueError(
            "DatasetCache was not able to locate storage_item because the associated data row could not be identified."
        )

    return self.user_client.get_storage_item(dr.backing_item_uuid)
get_storage_items
get_storage_items(data_hashes: list[str | UUID], dataset_hash: str | UUID | None = None, sign_urls: bool = False) -> list[StorageItem]

Parameters:

  • data_hashes (list[str | UUID]) –

    Data hashes for the assets for which you need the underlying storage items.

  • dataset_hash (str | UUID | None, default: None ) –

    If you didn't provide the associated dataset hash in the constructor, this is your last chance.

  • sign_urls (bool, default: False ) –

    If True, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).

Raises:

  • ValueError

    Mainly if underlying data row cannot be found.

Returns:

  • list[StorageItem]

    list of underlying storage items from which, e.g., client metadata can be updated.

Source code in encord_agents/core/dependencies/shares.py
def get_storage_items(
    self, data_hashes: list[str | UUID], dataset_hash: str | UUID | None = None, sign_urls: bool = False
) -> list[StorageItem]:
    """

    Args:
        data_hashes: Data hashes for the assets for which you need the underlying storage items.
        dataset_hash: If you didn't provide the associated dataset hash in the constructor,
            this is your last chance.
        sign_urls: If `True`, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).
        return self.user_client.get_storage_items()

    Raises:
        ValueError: Mainly if underlying data row cannot be found.

    Returns:
        list of underlying storage items from which, e.g., client metadata can be updated.
    """
    try:
        data_rows = [self.get_data_row(i, dataset_hash) for i in data_hashes]
    except ValueError:
        raise ValueError("Failed to load storage items because one or more data rows could not be obtained")

    return self.user_client.get_storage_items([dr.backing_item_uuid for dr in data_rows])

encord_agents.core.ontology

FieldType module-attribute

FieldType = Any

Field from pydantic can be anything so hard to type. This is supposed to indicate that you should use the pydantic.Field function to construct this var.

GenericFieldModel

Bases: BaseModel

Source code in encord_agents/core/ontology.py
class GenericFieldModel(BaseModel):
    feature_node_hash: str = Field()

    def set_answer(self, instance: ClassificationInstance) -> None:
        """
        This function will be called from the parsing loop to allow the model to set it self as answer
        on the classification instance.
        """
        ...
set_answer
set_answer(instance: ClassificationInstance) -> None

This function will be called from the parsing loop to allow the model to set it self as answer on the classification instance.

Source code in encord_agents/core/ontology.py
def set_answer(self, instance: ClassificationInstance) -> None:
    """
    This function will be called from the parsing loop to allow the model to set it self as answer
    on the classification instance.
    """
    ...

OntologyDataModel

Bases: Generic[OntologyType]

Class to create a pydantic model equivalent to an arbitrary classification ontology.

The model can be used to form a json schema based on the ontology. This is useful if you are, e.g., trying to get a structured respone from an LLM.

Example:

from pydantic import ValidationError

classifications = project.ontology_structure.classifications
objects = project.ontology_structure.classifications

data_model = OntologyDataModel([objects])
# or
data_model = OntologyDataModel([classifications])

# Get a json schema for the ontology
print(data_model.model_json_schema_str)

# Parse json following the schema into label instances
json_str = my_favourite_llm(
    f"what is this? pls follow {schema}", img
)
try:
    instances = data_model(json_str)
except ValidationError:
    # invalid json
    ...

for ins in instances:
    label_row.add_classification_instance(ins)

label_row.save()

For a concrete example, please see

Attributes:

  • ontology
  • DataModel (BaseModel) –
Source code in encord_agents/core/ontology.py
class OntologyDataModel(Generic[OntologyType]):
    """
    Class to create a pydantic model equivalent to an arbitrary classification ontology.

    The model can be used to form a json schema based on the ontology. This is useful if
    you are, e.g., trying to get a structured respone from an LLM.

    **Example:**

    ```python
    from pydantic import ValidationError

    classifications = project.ontology_structure.classifications
    objects = project.ontology_structure.classifications

    data_model = OntologyDataModel([objects])
    # or
    data_model = OntologyDataModel([classifications])

    # Get a json schema for the ontology
    print(data_model.model_json_schema_str)

    # Parse json following the schema into label instances
    json_str = my_favourite_llm(
        f"what is this? pls follow {schema}", img
    )
    try:
        instances = data_model(json_str)
    except ValidationError:
        # invalid json
        ...

    for ins in instances:
        label_row.add_classification_instance(ins)

    label_row.save()
    ```

    For a concrete example, please see [](TODO)

    Attributes:
        ontology:
        DataModel:
    """

    def __init__(self, root_obj: list[OntologyType] | OntologyType):
        _root_obj: list[OntologyType]
        if isinstance(root_obj, list):
            assert len(root_obj) != 0, "No ontology objects given to transform into a pydantic model"
            first, *rest = root_obj
            assert all(
                (isinstance(r, type(first)) for r in rest)
            ), "You cannot mix classifications and objects in the same model"
            _root_obj = root_obj
        else:
            _root_obj = [root_obj]

        self.ontology_lookup: dict[str, OntologyType] = {
            a.feature_node_hash: r for r in _root_obj for a in r.attributes
        }
        self.DataModel: BaseModel
        if isinstance(_root_obj[0], Object):
            legal_shapes = {Shape.BOUNDING_BOX, Shape.BITMASK, Shape.POLYGON, Shape.ROTATABLE_BOUNDING_BOX}

            illegal_objects = [o for o in _root_obj if o.shape not in legal_shapes]  # type: ignore

            if illegal_objects:
                illegal_names = [f'Object(name="{o.name}", shape={o.shape})' for o in illegal_objects]  # type: ignore
                assert not illegal_objects, f"Illegal shapes in provided ontology objects: `{illegal_names}`"

            self.DataModel = create_objects_model(_root_obj)  # type: ignore

        else:
            # Classifications can be build into one
            classification_fields = dict([construct_fields(attr) for clf in _root_obj for attr in clf.attributes])
            self.DataModel: BaseModel = create_model("ClassificationModel", **classification_fields)  # type: ignore

    @property
    def model_json_schema(self) -> dict[str, Any]:
        return self.DataModel.model_json_schema()

    @property
    def model_json_schema_str(self) -> str:
        return json.dumps(self.model_json_schema)

    @overload
    def __call__(self: "OntologyDataModel[Classification]", answer: str) -> list[ClassificationInstance]: ...

    @overload
    def __call__(self: "OntologyDataModel[Object]", answer: str) -> ObjectInstance: ...

    def __call__(
        self: "OntologyDataModel[Classification] | OntologyDataModel[Object]", answer: str
    ) -> list[ClassificationInstance] | ObjectInstance:
        """
        Validate a json response in accordance to the pydantic model.

        This function allows you to convert from a json object (e.g., coming from an llm)
        back to the encord "instance format".

        Args:
            answer_str: The json object as a raw string.

        Returns: a list of classification / object instances that you will then
            have to add to a label row.

        """
        return self.validate_json(answer)

    @overload
    def validate_json(self: "OntologyDataModel[Classification]", answer_str: str) -> list[ClassificationInstance]: ...

    @overload
    def validate_json(self: "OntologyDataModel[Object]", answer_str: str) -> ObjectInstance: ...

    def validate_json(
        self: "OntologyDataModel[Classification] | OntologyDataModel[Object]", answer_str: str
    ) -> list[ClassificationInstance] | ObjectInstance:
        """
        Validate a json response in accordance to the pydantic model.

        This function allows you to convert from a json object (e.g., coming from an llm)
        back to the encord "instance format".

        Args:
            answer_str: The json object as a raw string.

        Returns: a list of classification / object instances that you will then
            have to add to a label row.

        """
        answer = self.DataModel.model_validate_json(answer_str)
        # ^ if classification has a property per top-level classification in the ontology

        if self.DataModel.__name__ == OBJECTS_RADIO_MODEL:  # type: ignore
            ont_obj = answer.get_ontology_object()  # type: ignore
            ins = ont_obj.create_instance()

            if ont_obj.attributes:
                for attr_key, attr_val in vars(answer).items():
                    if attr_key == "feature_node_hash":
                        continue
                    attr_val.set_answer(ins)
            return ins
        else:
            answers = []
            for attr_val in vars(answer).values():
                ont_cls = self.ontology_lookup[attr_val.feature_node_hash]
                ins = ont_cls.create_instance()
                attr_val.set_answer(ins)
                answers.append(ins)

            return answers
__call__
__call__(answer: str) -> list[ClassificationInstance]
__call__(answer: str) -> ObjectInstance
__call__(answer: str) -> list[ClassificationInstance] | ObjectInstance

Validate a json response in accordance to the pydantic model.

This function allows you to convert from a json object (e.g., coming from an llm) back to the encord "instance format".

Parameters:

  • answer_str

    The json object as a raw string.

a list of classification / object instances that you will then

  • list[ClassificationInstance] | ObjectInstance

    have to add to a label row.

Source code in encord_agents/core/ontology.py
def __call__(
    self: "OntologyDataModel[Classification] | OntologyDataModel[Object]", answer: str
) -> list[ClassificationInstance] | ObjectInstance:
    """
    Validate a json response in accordance to the pydantic model.

    This function allows you to convert from a json object (e.g., coming from an llm)
    back to the encord "instance format".

    Args:
        answer_str: The json object as a raw string.

    Returns: a list of classification / object instances that you will then
        have to add to a label row.

    """
    return self.validate_json(answer)
validate_json
validate_json(answer_str: str) -> list[ClassificationInstance]
validate_json(answer_str: str) -> ObjectInstance
validate_json(answer_str: str) -> list[ClassificationInstance] | ObjectInstance

Validate a json response in accordance to the pydantic model.

This function allows you to convert from a json object (e.g., coming from an llm) back to the encord "instance format".

Parameters:

  • answer_str (str) –

    The json object as a raw string.

a list of classification / object instances that you will then

  • list[ClassificationInstance] | ObjectInstance

    have to add to a label row.

Source code in encord_agents/core/ontology.py
def validate_json(
    self: "OntologyDataModel[Classification] | OntologyDataModel[Object]", answer_str: str
) -> list[ClassificationInstance] | ObjectInstance:
    """
    Validate a json response in accordance to the pydantic model.

    This function allows you to convert from a json object (e.g., coming from an llm)
    back to the encord "instance format".

    Args:
        answer_str: The json object as a raw string.

    Returns: a list of classification / object instances that you will then
        have to add to a label row.

    """
    answer = self.DataModel.model_validate_json(answer_str)
    # ^ if classification has a property per top-level classification in the ontology

    if self.DataModel.__name__ == OBJECTS_RADIO_MODEL:  # type: ignore
        ont_obj = answer.get_ontology_object()  # type: ignore
        ins = ont_obj.create_instance()

        if ont_obj.attributes:
            for attr_key, attr_val in vars(answer).items():
                if attr_key == "feature_node_hash":
                    continue
                attr_val.set_answer(ins)
        return ins
    else:
        answers = []
        for attr_val in vars(answer).values():
            ont_cls = self.ontology_lookup[attr_val.feature_node_hash]
            ins = ont_cls.create_instance()
            attr_val.set_answer(ins)
            answers.append(ins)

        return answers

encord_agents.core.settings

Settings used throughout the module.

Note that central settings will be read via environment variables.

Settings

Bases: BaseSettings

Source code in encord_agents/core/settings.py
class Settings(BaseSettings):
    ssh_key_file: Optional[Path] = Field(validation_alias="ENCORD_SSH_KEY_FILE", default=None)
    """
    The path to the private ssh key file to authenticate with Encord.

    Either this or the `ENCORD_SSH_KEY` needs to be set for most use-cases.
    To setup a key with Encord, please see
    [the platform docs](https://docs.encord.com/platform-documentation/Annotate/annotate-api-keys).
    """
    ssh_key_content: Optional[str] = Field(validation_alias="ENCORD_SSH_KEY", default=None)
    """
    The content of the private ssh key file to authenticate with Encord.

    Either this or the `ENCORD_SSH_KEY` needs to be set for most use-cases.
    To setup a key with Encord, please see
    [the platform docs](https://docs.encord.com/platform-documentation/Annotate/annotate-api-keys).
    """
    domain: Optional[str] = Field(validation_alias="ENCORD_DOMAIN", default=None)

    @field_validator("ssh_key_content")
    @classmethod
    def check_key_content(cls, content: str | None) -> str | None:
        if content is None:
            return content

        if os.path.exists(content):
            raise PrintableError(
                f"The env variable `[blue]ENCORD_SSH_KEY[/blue]` (={content}) is set with a value that looks like a path and not ssh key content. Did you mean to set the `[blue]ENCORD_SSH_KEY_FILE[/blue]` environment variable with the private key file content directly?"
            )

        return content

    @field_validator("ssh_key_file")
    @classmethod
    def check_path_expand_and_exists(cls, path: Path | None) -> Path | None:
        if path is None:
            return path

        path = path.expanduser()

        if not path.is_file():
            raise PrintableError(
                "The env variable `[blue]ENCORD_SSH_KEY_FILE[/blue]` is set with a value that could not be found in the file system. Did you mean to set the `[blue]ENCORD_SSH_KEY[/blue]` environment variable with the private key file content directly?"
            )

        return path

    @model_validator(mode="after")
    def check_key(self: "Settings") -> "Settings":
        if not any(map(bool, [self.ssh_key_content, self.ssh_key_file])):
            raise PrintableError(
                f"Must specify either `[blue]ENCORD_SSH_KEY_FILE[/blue]` or `[blue]ENCORD_SSH_KEY[/blue]` env variables. If you don't have an ssh key, please refere to our docs:{os.linesep}[magenta]https://docs.encord.com/platform-documentation/Annotate/annotate-api-keys#creating-keys-using-terminal-powershell[/magenta]"
            )

        if all(map(bool, [self.ssh_key_file, self.ssh_key_content])):
            import warnings

            warnings.warn(
                "You have configured both the `ENCORD_SSH_KEY` and `ENCORD_SSH_KEY_FILE`. The `ENCORD_SSH_KEY` will take precedence."
            )

        return self

    @property
    def ssh_key(self) -> str:
        if self.ssh_key_content is None:
            if self.ssh_key_file is None:
                raise ValueError("Both ssh key content and ssh key file is None")
            self.ssh_key_content = self.ssh_key_file.read_text()
        return self.ssh_key_content
ssh_key_content class-attribute instance-attribute
ssh_key_content: Optional[str] = Field(validation_alias='ENCORD_SSH_KEY', default=None)

The content of the private ssh key file to authenticate with Encord.

Either this or the ENCORD_SSH_KEY needs to be set for most use-cases. To setup a key with Encord, please see the platform docs.

ssh_key_file class-attribute instance-attribute
ssh_key_file: Optional[Path] = Field(validation_alias='ENCORD_SSH_KEY_FILE', default=None)

The path to the private ssh key file to authenticate with Encord.

Either this or the ENCORD_SSH_KEY needs to be set for most use-cases. To setup a key with Encord, please see the platform docs.

encord_agents.core.utils

download_asset

download_asset(lr: LabelRowV2, frame: int | None = None) -> Generator[Path, None, None]

Download the asset associated to a label row to disk.

This function is a context manager. Data will be cleaned up when the context is left.

Example usage:

with download_asset(lr, 10) as asset_path:
    # In here the file exists
    pixel_values = np.asarray(Image.open(asset_path))

# outside, it will be cleaned up

Parameters:

  • lr (LabelRowV2) –

    The label row for which you want to download the associated asset.

  • frame (int | None, default: None ) –

    The frame that you need. If frame is none for a video, you will get the video path.

Raises:

  • NotImplementedError

    If you try to get all frames of an image group.

  • ValueError

    If you try to download an unsupported data type (e.g., DICOM).

Yields:

  • Path

    The file path for the requested asset.

Source code in encord_agents/core/utils.py
@contextmanager
def download_asset(lr: LabelRowV2, frame: int | None = None) -> Generator[Path, None, None]:
    """
    Download the asset associated to a label row to disk.

    This function is a context manager. Data will be cleaned up when the context is left.

    Example usage:

        with download_asset(lr, 10) as asset_path:
            # In here the file exists
            pixel_values = np.asarray(Image.open(asset_path))

        # outside, it will be cleaned up

    Args:
        lr: The label row for which you want to download the associated asset.
        frame: The frame that you need. If frame is none for a video, you will get the video path.

    Raises:
        NotImplementedError: If you try to get all frames of an image group.
        ValueError: If you try to download an unsupported data type (e.g., DICOM).


    Yields:
        The file path for the requested asset.

    """
    url: str | None = None
    if lr.data_link is not None and lr.data_link[:5] == "https":
        url = lr.data_link
    elif lr.backing_item_uuid is not None:
        storage_item = get_user_client().get_storage_item(lr.backing_item_uuid, sign_url=True)
        url = storage_item.get_signed_url()

    # Fallback for native image groups (they don't have a url)
    is_image_sequence = lr.data_type == DataType.IMG_GROUP
    if url is None:
        is_image_sequence = False
        _, images_list = lr._project_client.get_data(lr.data_hash, get_signed_url=True)
        if images_list is None:
            raise ValueError("Image list should not be none for image groups.")
        if frame is None:
            raise NotImplementedError(
                "Downloading entire image group is not supported. Please contact Encord at support@encord.com for help or submit a PR with an implementation."
            )
        image = images_list[frame]
        url = cast(str | None, image.file_link)

    if url is None:
        raise ValueError("Failed to get a signed url for the asset")

    response = requests.get(url)
    response.raise_for_status()

    with TemporaryDirectory() as dir_name:
        dir_path = Path(dir_name)

        _, suffix = _guess_file_suffix(url, lr)
        file_path = dir_path / f"{lr.data_hash}{suffix}"
        with open(file_path, "wb") as f:
            for chunk in response.iter_content(chunk_size=4096):
                if chunk:
                    f.write(chunk)

        if (lr.data_type == DataType.VIDEO or is_image_sequence) and frame is not None:  # Get that exact frame
            frame_content = get_frame(file_path, frame)
            frame_file = file_path.with_name(f"{file_path.name}_{frame}").with_suffix(".png")
            cv2.imwrite(frame_file.as_posix(), frame_content)
            file_path = frame_file

        yield file_path

get_initialised_label_row

get_initialised_label_row(frame_data: FrameData, include_args: LabelRowMetadataIncludeArgs | None = None, init_args: LabelRowInitialiseLabelsArgs | None = None) -> LabelRowV2

Get an initialised label row from the frame_data information.

Parameters:

  • frame_data (FrameData) –

    The data pointing to the data asset.

Raises:

  • Exception

    If the frame_data cannot be matched to a label row

Returns:

  • LabelRowV2

    The initialized label row.

Source code in encord_agents/core/utils.py
def get_initialised_label_row(
    frame_data: FrameData,
    include_args: LabelRowMetadataIncludeArgs | None = None,
    init_args: LabelRowInitialiseLabelsArgs | None = None,
) -> LabelRowV2:
    """
    Get an initialised label row from the frame_data information.

    Args:
        frame_data: The data pointing to the data asset.

    Raises:
        Exception: If the `frame_data` cannot be matched to a label row

    Returns:
        The initialized label row.

    """
    user_client = get_user_client()
    project = user_client.get_project(str(frame_data.project_hash))
    include_args = include_args or LabelRowMetadataIncludeArgs()
    init_args = init_args or LabelRowInitialiseLabelsArgs()
    matched_lrs = project.list_label_rows_v2(data_hashes=[frame_data.data_hash], **include_args.model_dump())
    num_matches = len(matched_lrs)
    if num_matches > 1:
        raise Exception(f"Non unique match: matched {num_matches} label rows!")
    elif num_matches == 0:
        raise Exception("No label rows were matched!")
    lr = matched_lrs.pop()
    lr.initialise_labels(**init_args.model_dump())
    return lr

get_user_client cached

get_user_client() -> EncordUserClient

Generate an user client to access Encord.

Returns:

  • EncordUserClient

    An EncordUserClient authenticated with the credentials from the encord_agents.core.settings.Settings.

Source code in encord_agents/core/utils.py
@lru_cache(maxsize=1)
def get_user_client() -> EncordUserClient:
    """
    Generate an user client to access Encord.

    Returns:
        An EncordUserClient authenticated with the credentials from the encord_agents.core.settings.Settings.

    """
    settings = Settings()
    kwargs: dict[str, Any] = {"domain": settings.domain} if settings.domain else {}
    return EncordUserClient.create_with_ssh_private_key(ssh_private_key=settings.ssh_key, **kwargs)

encord_agents.core.video

get_frame

get_frame(video_path: Path, desired_frame: int) -> NDArray[np.uint8]

Extract an exact frame from a video.

Parameters:

  • video_path (Path) –

    The file path to where the video is stored.

  • desired_frame (int) –

    The frame to extract

Raises:

  • Exception

    If the video cannot be opened properly or the requested frame could not be retrieved from the video.

Returns:

  • NDArray[uint8]

    Numpy array of shape [h, w, c] where channels are RGB.

Source code in encord_agents/core/video.py
def get_frame(video_path: Path, desired_frame: int) -> NDArray[np.uint8]:
    """
    Extract an exact frame from a video.

    Args:
        video_path: The file path to where the video is stored.
        desired_frame: The frame to extract

    Raises:
        Exception:  If the video cannot be opened properly or the requested
            frame could not be retrieved from the video.

    Returns:
        Numpy array of shape [h, w, c] where channels are RGB.

    """
    cap = cv2.VideoCapture(video_path.as_posix())
    if not cap.isOpened():
        raise Exception("Error opening video file.")

    cap.set(cv2.CAP_PROP_POS_FRAMES, desired_frame)

    ret, frame = cap.read()
    if not ret:
        raise Exception("Error retrieving frame.")

    cap.release()
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    return frame.astype(np.uint8)

iter_video

iter_video(video_path: Path) -> Iterator[Frame]

Iterate video frame by frame.

Parameters:

  • video_path (Path) –

    The file path to the video you wish to iterate.

Raises:

  • Exception

    If the video file could not be opened properly.

Yields:

  • Frame

    Frames from the video.

Source code in encord_agents/core/video.py
def iter_video(video_path: Path) -> Iterator[Frame]:
    """
    Iterate video frame by frame.

    Args:
        video_path: The file path to the video you wish to iterate.

    Raises:
        Exception: If the video file could not be opened properly.

    Yields:
        Frames from the video.

    """
    cap = cv2.VideoCapture(video_path.as_posix())
    if not cap.isOpened():
        raise Exception("Error opening video file.")

    frame_num = 0
    ret, frame = cap.read()
    while ret:
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        yield Frame(frame=frame_num, content=rgb_frame.astype(np.uint8))

        ret, frame = cap.read()
        frame_num += 1

    cap.release()