Skip to content

Core Reference

encord_agents.core.data_model

Frame dataclass

A dataclass to hold the content of one frame in a video.

Source code in encord_agents/core/data_model.py
@dataclass(frozen=True)
class Frame:
    """
    A dataclass to hold the content of one frame in a video.
    """

    frame: int
    """
    The frame number within the video
    """
    content: "NDArray[np.uint8]"
    """
    An [h,w,c] np.array with color channels RGB.
    """

    @overload
    def b64_encoding(
        self,
        image_format: Literal[".jpeg", ".jpg", ".png"] = ".jpeg",
        output_format: Literal["raw", "url"] = "raw",
    ) -> str: ...

    @overload
    def b64_encoding(
        self,
        image_format: Literal[".jpeg", ".jpg", ".png"] = ".jpeg",
        output_format: Literal["openai", "anthropic"] = "openai",
    ) -> dict: ...

    def b64_encoding(
        self,
        image_format: Literal[".jpeg", ".jpg", ".png"] = ".jpeg",
        output_format: Literal["url", "openai", "anthropic", "raw"] = "url",
    ) -> str | dict:
        """
        Get a base64 representation of the image content.

        This method allows you to convert the content into a base64 representation
        based on various different image encodings.
        This is useful, e.g., for prompting LLMs with image content.


        Please see details for formats below.

        Args:
            image_format: Which type of image encoding to use.
            output_format: Different common formats.
                - `raw`: the image content as a raw b64 string
                - `url`: url encoded image content. Compatible with, e.g., `<img src="<the_encoding>" />`
                - `openai`: a dict with `type` and `image_url` keys
                _ `anthropic`: a dict with `media_type`, `type`, and `data` keys.

        Returns: a dict or string depending on `output_format`.

        """
        b64_str = b64_encode_image(self.content, image_format)
        if output_format == "raw":
            return b64_str

        media_type = DATA_TYPES.get(image_format, f"image/{image_format.replace('.', '')}")
        image_url = f"data:{media_type};base64,{b64_str}"
        if output_format == "url":
            return image_url
        elif output_format == "openai":
            return {
                "type": "image_url",
                "image_url": {
                    "url": image_url,
                },
            }
        elif output_format == "anthropic":
            return {
                "type": "image",
                "source": {
                    "type": "base64",
                    "media_type": media_type,
                    "data": b64_str,
                },
            }
content instance-attribute
content: NDArray[uint8]

An [h,w,c] np.array with color channels RGB.

frame instance-attribute
frame: int

The frame number within the video

b64_encoding
b64_encoding(image_format: Literal['.jpeg', '.jpg', '.png'] = '.jpeg', output_format: Literal['raw', 'url'] = 'raw') -> str
b64_encoding(image_format: Literal['.jpeg', '.jpg', '.png'] = '.jpeg', output_format: Literal['openai', 'anthropic'] = 'openai') -> dict
b64_encoding(image_format: Literal['.jpeg', '.jpg', '.png'] = '.jpeg', output_format: Literal['url', 'openai', 'anthropic', 'raw'] = 'url') -> str | dict

Get a base64 representation of the image content.

This method allows you to convert the content into a base64 representation based on various different image encodings. This is useful, e.g., for prompting LLMs with image content.

Please see details for formats below.

Parameters:

  • image_format (Literal['.jpeg', '.jpg', '.png'], default: '.jpeg' ) –

    Which type of image encoding to use.

  • output_format (Literal['url', 'openai', 'anthropic', 'raw'], default: 'url' ) –

    Different common formats. - raw: the image content as a raw b64 string - url: url encoded image content. Compatible with, e.g., <img src="<the_encoding>" /> - openai: a dict with type and image_url keys _ anthropic: a dict with media_type, type, and data keys.

Returns: a dict or string depending on output_format.

Source code in encord_agents/core/data_model.py
def b64_encoding(
    self,
    image_format: Literal[".jpeg", ".jpg", ".png"] = ".jpeg",
    output_format: Literal["url", "openai", "anthropic", "raw"] = "url",
) -> str | dict:
    """
    Get a base64 representation of the image content.

    This method allows you to convert the content into a base64 representation
    based on various different image encodings.
    This is useful, e.g., for prompting LLMs with image content.


    Please see details for formats below.

    Args:
        image_format: Which type of image encoding to use.
        output_format: Different common formats.
            - `raw`: the image content as a raw b64 string
            - `url`: url encoded image content. Compatible with, e.g., `<img src="<the_encoding>" />`
            - `openai`: a dict with `type` and `image_url` keys
            _ `anthropic`: a dict with `media_type`, `type`, and `data` keys.

    Returns: a dict or string depending on `output_format`.

    """
    b64_str = b64_encode_image(self.content, image_format)
    if output_format == "raw":
        return b64_str

    media_type = DATA_TYPES.get(image_format, f"image/{image_format.replace('.', '')}")
    image_url = f"data:{media_type};base64,{b64_str}"
    if output_format == "url":
        return image_url
    elif output_format == "openai":
        return {
            "type": "image_url",
            "image_url": {
                "url": image_url,
            },
        }
    elif output_format == "anthropic":
        return {
            "type": "image",
            "source": {
                "type": "base64",
                "media_type": media_type,
                "data": b64_str,
            },
        }

FrameData

Bases: BaseModel

Holds the data sent from the Encord Label Editor at the time of triggering the agent.

Source code in encord_agents/core/data_model.py
class FrameData(BaseModel):
    """
    Holds the data sent from the Encord Label Editor at the time of triggering the agent.
    """

    project_hash: UUID = Field(validation_alias="projectHash")
    """
    The identifier of the given project.
    """
    data_hash: UUID = Field(validation_alias="dataHash")
    """
    The identifier of the given data asset.
    """
    frame: int = Field(ge=0)
    """
    The frame number. If single image, it's default 0.
    """
data_hash class-attribute instance-attribute
data_hash: UUID = Field(validation_alias='dataHash')

The identifier of the given data asset.

frame class-attribute instance-attribute
frame: int = Field(ge=0)

The frame number. If single image, it's default 0.

project_hash class-attribute instance-attribute
project_hash: UUID = Field(validation_alias='projectHash')

The identifier of the given project.

InstanceCrop dataclass

Bases: Frame

A dataclass to hold the frame content of one object instance in a video or image.

Source code in encord_agents/core/data_model.py
@dataclass(frozen=True)
class InstanceCrop(Frame):
    """
    A dataclass to hold the frame content of one object instance in a video or image.
    """

    instance: ObjectInstance
    r"""
    The [ObjectInstance](https://docs.encord.com/sdk-documentation/sdk-references/ObjectInstance#objectinstance){ target="\_blank", rel="noopener noreferrer" } associated to the crop.
    """
instance instance-attribute
instance: ObjectInstance

The ObjectInstance associated to the crop.

encord_agents.core.dependencies

shares

DataLookup
Source code in encord_agents/core/dependencies/shares.py
class DataLookup:
    __instances__: dict[UUID, DataLookup] = {}

    def __init__(self, dataset_hashes: list[str | UUID] | None = None) -> None:
        self.user_client = get_user_client()
        self.datasets = {UUID(d): self.user_client.get_dataset(d) for d in map(str, dataset_hashes or [])}
        self.data_rows = {dr.uid: dr for dataset in self.datasets.values() for dr in dataset.data_rows}

    @classmethod
    def sharable(cls, project: Project) -> DataLookup:
        ph = UUID(project.project_hash)
        if ph not in cls.__instances__:
            cls.__instances__[ph] = cls([ds.dataset_hash for ds in project.list_datasets()])
        return cls.__instances__[ph]

    def get_data_row(self, data_hash: str | UUID, dataset_hash: str | UUID | None = None) -> DataRow:
        dr = self.data_rows.get(str(data_hash))
        if dr is None:
            if dataset_hash is not None:
                dataset = self.user_client.get_dataset(str(dataset_hash))
                self.datasets[UUID(str(dataset_hash))] = dataset
                self.data_rows.update({dr.uid: dr for dr in dataset.data_rows})
            else:
                raise ValueError(
                    "Cannot identify a data row without knowing the dataset hash. Please provide it to the function call or to the constructor of the `DataLookup`"
                )
            dr = self.data_rows.get(str(data_hash))
        if dr is None:
            raise ValueError("DatasetCache was not able to locate data row from data hash")
        return dr

    @property
    def backing_item_uuids(self) -> list[UUID]:
        """
        Get all backing item uuids for all data rows in the data lookup.
        """
        return [dr.backing_item_uuid for dr in self.data_rows.values()]

    def get_storage_item(
        self, data_hash: str | UUID, dataset_hash: str | UUID | None = None, sign_url: bool = False
    ) -> StorageItem:
        """

        Args:
            data_hash: Data hash for the asset for which you need the underlying storage item.
            dataset_hash: If you didn't provide the associated dataset hash in the constructor,
                this is your last chance.
            sign_url: If `True`, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).

        Raises:
            ValueError: Mainly if underlying data row cannot be found.

        Returns:
            The underlying storage item from which, e.g., client metadata can be updated.

        """
        try:
            dr = self.get_data_row(data_hash, dataset_hash)
        except ValueError:
            raise ValueError(
                "DatasetCache was not able to locate storage_item because the associated data row could not be identified."
            )

        return self.user_client.get_storage_item(dr.backing_item_uuid)

    def get_storage_items(
        self, data_hashes: list[str | UUID], dataset_hash: str | UUID | None = None, sign_urls: bool = False
    ) -> list[StorageItem]:
        """

        Args:
            data_hashes: Data hashes for the assets for which you need the underlying storage items.
            dataset_hash: If you didn't provide the associated dataset hash in the constructor,
                this is your last chance.
            sign_urls: If `True`, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).
            return self.user_client.get_storage_items()

        Raises:
            ValueError: Mainly if underlying data row cannot be found.

        Returns:
            list of underlying storage items from which, e.g., client metadata can be updated.
        """
        try:
            data_rows = [self.get_data_row(i, dataset_hash) for i in data_hashes]
        except ValueError:
            raise ValueError("Failed to load storage items because one or more data rows could not be obtained")

        return self.user_client.get_storage_items([dr.backing_item_uuid for dr in data_rows])
backing_item_uuids property
backing_item_uuids: list[UUID]

Get all backing item uuids for all data rows in the data lookup.

get_storage_item
get_storage_item(data_hash: str | UUID, dataset_hash: str | UUID | None = None, sign_url: bool = False) -> StorageItem

Parameters:

  • data_hash (str | UUID) –

    Data hash for the asset for which you need the underlying storage item.

  • dataset_hash (str | UUID | None, default: None ) –

    If you didn't provide the associated dataset hash in the constructor, this is your last chance.

  • sign_url (bool, default: False ) –

    If True, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).

Raises:

  • ValueError

    Mainly if underlying data row cannot be found.

Returns:

  • StorageItem

    The underlying storage item from which, e.g., client metadata can be updated.

Source code in encord_agents/core/dependencies/shares.py
def get_storage_item(
    self, data_hash: str | UUID, dataset_hash: str | UUID | None = None, sign_url: bool = False
) -> StorageItem:
    """

    Args:
        data_hash: Data hash for the asset for which you need the underlying storage item.
        dataset_hash: If you didn't provide the associated dataset hash in the constructor,
            this is your last chance.
        sign_url: If `True`, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).

    Raises:
        ValueError: Mainly if underlying data row cannot be found.

    Returns:
        The underlying storage item from which, e.g., client metadata can be updated.

    """
    try:
        dr = self.get_data_row(data_hash, dataset_hash)
    except ValueError:
        raise ValueError(
            "DatasetCache was not able to locate storage_item because the associated data row could not be identified."
        )

    return self.user_client.get_storage_item(dr.backing_item_uuid)
get_storage_items
get_storage_items(data_hashes: list[str | UUID], dataset_hash: str | UUID | None = None, sign_urls: bool = False) -> list[StorageItem]

Parameters:

  • data_hashes (list[str | UUID]) –

    Data hashes for the assets for which you need the underlying storage items.

  • dataset_hash (str | UUID | None, default: None ) –

    If you didn't provide the associated dataset hash in the constructor, this is your last chance.

  • sign_urls (bool, default: False ) –

    If True, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).

Raises:

  • ValueError

    Mainly if underlying data row cannot be found.

Returns:

  • list[StorageItem]

    list of underlying storage items from which, e.g., client metadata can be updated.

Source code in encord_agents/core/dependencies/shares.py
def get_storage_items(
    self, data_hashes: list[str | UUID], dataset_hash: str | UUID | None = None, sign_urls: bool = False
) -> list[StorageItem]:
    """

    Args:
        data_hashes: Data hashes for the assets for which you need the underlying storage items.
        dataset_hash: If you didn't provide the associated dataset hash in the constructor,
            this is your last chance.
        sign_urls: If `True`, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).
        return self.user_client.get_storage_items()

    Raises:
        ValueError: Mainly if underlying data row cannot be found.

    Returns:
        list of underlying storage items from which, e.g., client metadata can be updated.
    """
    try:
        data_rows = [self.get_data_row(i, dataset_hash) for i in data_hashes]
    except ValueError:
        raise ValueError("Failed to load storage items because one or more data rows could not be obtained")

    return self.user_client.get_storage_items([dr.backing_item_uuid for dr in data_rows])

encord_agents.core.ontology

FieldType module-attribute

FieldType = Any

Field from pydantic can be anything so hard to type. This is supposed to indicate that you should use the pydantic.Field function to construct this var.

GenericFieldModel

Bases: BaseModel

Source code in encord_agents/core/ontology.py
class GenericFieldModel(BaseModel):
    feature_node_hash: str = Field()

    def set_answer(self, instance: ClassificationInstance) -> None:
        """
        This function will be called from the parsing loop to allow the model to set it self as answer
        on the classification instance.
        """
        ...
set_answer
set_answer(instance: ClassificationInstance) -> None

This function will be called from the parsing loop to allow the model to set it self as answer on the classification instance.

Source code in encord_agents/core/ontology.py
def set_answer(self, instance: ClassificationInstance) -> None:
    """
    This function will be called from the parsing loop to allow the model to set it self as answer
    on the classification instance.
    """
    ...

OntologyDataModel

Bases: Generic[OntologyType]

Class to create a pydantic model equivalent to an arbitrary classification ontology.

The model can be used to form a json schema based on the ontology. This is useful if you are, e.g., trying to get a structured respone from an LLM.

Example:

from pydantic import ValidationError

classifications = project.ontology_structure.classifications
objects = project.ontology_structure.classifications

data_model = OntologyDataModel([objects])
# or
data_model = OntologyDataModel([classifications])

# Get a json schema for the ontology
print(data_model.model_json_schema_str)

# Parse json following the schema into label instances
json_str = my_favourite_llm(
    f"what is this? pls follow {schema}", img
)
try:
    instances = data_model(json_str)
except ValidationError:
    # invalid json
    ...

for ins in instances:
    label_row.add_classification_instance(ins)

label_row.save()

For a concrete example, please see

Attributes:

  • ontology
  • DataModel (BaseModel) –
Source code in encord_agents/core/ontology.py
class OntologyDataModel(Generic[OntologyType]):
    """
    Class to create a pydantic model equivalent to an arbitrary classification ontology.

    The model can be used to form a json schema based on the ontology. This is useful if
    you are, e.g., trying to get a structured respone from an LLM.

    **Example:**

    ```python
    from pydantic import ValidationError

    classifications = project.ontology_structure.classifications
    objects = project.ontology_structure.classifications

    data_model = OntologyDataModel([objects])
    # or
    data_model = OntologyDataModel([classifications])

    # Get a json schema for the ontology
    print(data_model.model_json_schema_str)

    # Parse json following the schema into label instances
    json_str = my_favourite_llm(
        f"what is this? pls follow {schema}", img
    )
    try:
        instances = data_model(json_str)
    except ValidationError:
        # invalid json
        ...

    for ins in instances:
        label_row.add_classification_instance(ins)

    label_row.save()
    ```

    For a concrete example, please see [](TODO)

    Attributes:
        ontology:
        DataModel:
    """

    def __init__(self, root_obj: list[OntologyType] | OntologyType):
        _root_obj: list[OntologyType]
        if isinstance(root_obj, list):
            assert len(root_obj) != 0, "No ontology objects given to transform into a pydantic model"
            first, *rest = root_obj
            assert all(
                (isinstance(r, type(first)) for r in rest)
            ), "You cannot mix classifications and objects in the same model"
            _root_obj = root_obj
        else:
            _root_obj = [root_obj]

        self.ontology_lookup: dict[str, OntologyType] = {
            a.feature_node_hash: r for r in _root_obj for a in r.attributes
        }
        self.DataModel: BaseModel
        if isinstance(_root_obj[0], Object):
            legal_shapes = {Shape.BOUNDING_BOX, Shape.BITMASK, Shape.POLYGON, Shape.ROTATABLE_BOUNDING_BOX}

            illegal_objects = [o for o in _root_obj if o.shape not in legal_shapes]  # type: ignore

            if illegal_objects:
                illegal_names = [f'Object(name="{o.name}", shape={o.shape})' for o in illegal_objects]  # type: ignore
                assert not illegal_objects, f"Illegal shapes in provided ontology objects: `{illegal_names}`"

            self.DataModel = create_objects_model(_root_obj)  # type: ignore

        else:
            # Classifications can be build into one
            classification_fields = dict([construct_fields(attr) for clf in _root_obj for attr in clf.attributes])
            self.DataModel: BaseModel = create_model("ClassificationModel", **classification_fields)  # type: ignore

    @property
    def model_json_schema(self) -> dict[str, Any]:
        return self.DataModel.model_json_schema()

    @property
    def model_json_schema_str(self) -> str:
        return json.dumps(self.model_json_schema)

    @overload
    def __call__(self: "OntologyDataModel[Classification]", answer: str) -> list[ClassificationInstance]: ...

    @overload
    def __call__(self: "OntologyDataModel[Object]", answer: str) -> ObjectInstance: ...

    def __call__(
        self: "OntologyDataModel[Classification] | OntologyDataModel[Object]", answer: str
    ) -> list[ClassificationInstance] | ObjectInstance:
        """
        Validate a json response in accordance to the pydantic model.

        This function allows you to convert from a json object (e.g., coming from an llm)
        back to the encord "instance format".

        Args:
            answer_str: The json object as a raw string.

        Returns: a list of classification / object instances that you will then
            have to add to a label row.

        """
        return self.validate_json(answer)

    @overload
    def validate_json(self: "OntologyDataModel[Classification]", answer_str: str) -> list[ClassificationInstance]: ...

    @overload
    def validate_json(self: "OntologyDataModel[Object]", answer_str: str) -> ObjectInstance: ...

    def validate_json(
        self: "OntologyDataModel[Classification] | OntologyDataModel[Object]", answer_str: str
    ) -> list[ClassificationInstance] | ObjectInstance:
        """
        Validate a json response in accordance to the pydantic model.

        This function allows you to convert from a json object (e.g., coming from an llm)
        back to the encord "instance format".

        Args:
            answer_str: The json object as a raw string.

        Returns: a list of classification / object instances that you will then
            have to add to a label row.

        """
        answer = self.DataModel.model_validate_json(answer_str)
        # ^ if classification has a property per top-level classification in the ontology

        if self.DataModel.__name__ == OBJECTS_RADIO_MODEL:  # type: ignore
            ont_obj = answer.get_ontology_object()  # type: ignore
            ins = ont_obj.create_instance()

            if ont_obj.attributes:
                for attr_key, attr_val in vars(answer).items():
                    if attr_key == "feature_node_hash":
                        continue
                    attr_val.set_answer(ins)
            return ins
        else:
            answers = []
            for attr_val in vars(answer).values():
                ont_cls = self.ontology_lookup[attr_val.feature_node_hash]
                ins = ont_cls.create_instance()
                attr_val.set_answer(ins)
                answers.append(ins)

            return answers
__call__
__call__(answer: str) -> list[ClassificationInstance]
__call__(answer: str) -> ObjectInstance
__call__(answer: str) -> list[ClassificationInstance] | ObjectInstance

Validate a json response in accordance to the pydantic model.

This function allows you to convert from a json object (e.g., coming from an llm) back to the encord "instance format".

Parameters:

  • answer_str

    The json object as a raw string.

a list of classification / object instances that you will then

  • list[ClassificationInstance] | ObjectInstance

    have to add to a label row.

Source code in encord_agents/core/ontology.py
def __call__(
    self: "OntologyDataModel[Classification] | OntologyDataModel[Object]", answer: str
) -> list[ClassificationInstance] | ObjectInstance:
    """
    Validate a json response in accordance to the pydantic model.

    This function allows you to convert from a json object (e.g., coming from an llm)
    back to the encord "instance format".

    Args:
        answer_str: The json object as a raw string.

    Returns: a list of classification / object instances that you will then
        have to add to a label row.

    """
    return self.validate_json(answer)
validate_json
validate_json(answer_str: str) -> list[ClassificationInstance]
validate_json(answer_str: str) -> ObjectInstance
validate_json(answer_str: str) -> list[ClassificationInstance] | ObjectInstance

Validate a json response in accordance to the pydantic model.

This function allows you to convert from a json object (e.g., coming from an llm) back to the encord "instance format".

Parameters:

  • answer_str (str) –

    The json object as a raw string.

a list of classification / object instances that you will then

  • list[ClassificationInstance] | ObjectInstance

    have to add to a label row.

Source code in encord_agents/core/ontology.py
def validate_json(
    self: "OntologyDataModel[Classification] | OntologyDataModel[Object]", answer_str: str
) -> list[ClassificationInstance] | ObjectInstance:
    """
    Validate a json response in accordance to the pydantic model.

    This function allows you to convert from a json object (e.g., coming from an llm)
    back to the encord "instance format".

    Args:
        answer_str: The json object as a raw string.

    Returns: a list of classification / object instances that you will then
        have to add to a label row.

    """
    answer = self.DataModel.model_validate_json(answer_str)
    # ^ if classification has a property per top-level classification in the ontology

    if self.DataModel.__name__ == OBJECTS_RADIO_MODEL:  # type: ignore
        ont_obj = answer.get_ontology_object()  # type: ignore
        ins = ont_obj.create_instance()

        if ont_obj.attributes:
            for attr_key, attr_val in vars(answer).items():
                if attr_key == "feature_node_hash":
                    continue
                attr_val.set_answer(ins)
        return ins
    else:
        answers = []
        for attr_val in vars(answer).values():
            ont_cls = self.ontology_lookup[attr_val.feature_node_hash]
            ins = ont_cls.create_instance()
            attr_val.set_answer(ins)
            answers.append(ins)

        return answers

encord_agents.core.settings

Settings used throughout the module.

Note that central settings will be read via environment variables.

Settings

Bases: BaseSettings

Source code in encord_agents/core/settings.py
class Settings(BaseSettings):
    ssh_key_file: Optional[Path] = Field(validation_alias="ENCORD_SSH_KEY_FILE", default=None)
    """
    The path to the private ssh key file to authenticate with Encord.

    Either this or the `ENCORD_SSH_KEY` needs to be set for most use-cases.
    To setup a key with Encord, please see
    [the platform docs](https://docs.encord.com/platform-documentation/Annotate/annotate-api-keys).
    """
    ssh_key_content: Optional[str] = Field(validation_alias="ENCORD_SSH_KEY", default=None)
    """
    The content of the private ssh key file to authenticate with Encord.

    Either this or the `ENCORD_SSH_KEY` needs to be set for most use-cases.
    To setup a key with Encord, please see
    [the platform docs](https://docs.encord.com/platform-documentation/Annotate/annotate-api-keys).
    """
    domain: Optional[str] = Field(validation_alias="ENCORD_DOMAIN", default=None)

    @field_validator("ssh_key_content")
    @classmethod
    def check_key_content(cls, content: str | None):
        if content is None:
            return content

        file_attempt = Path(content).expanduser()
        if file_attempt.is_file():
            raise PrintableError(
                f"The env variable `[blue]ENCORD_SSH_KEY[/blue]` (={file_attempt}) is set with a value that looks like a path and not ssh key content. Did you mean to set the `[blue]ENCORD_SSH_KEY_FILE[/blue]` environment variable with the private key file content directly?"
            )

        return content

    @field_validator("ssh_key_file")
    @classmethod
    def check_path_expand_and_exists(cls, path: Path | None):
        if path is None:
            return path

        path = path.expanduser()

        if not path.is_file():
            raise PrintableError(
                "The env variable `[blue]ENCORD_SSH_KEY_FILE[/blue]` is set with a value that could not be found in the file system. Did you mean to set the `[blue]ENCORD_SSH_KEY[/blue]` environment variable with the private key file content directly?"
            )

        return path

    @model_validator(mode="after")
    def check_key(self):
        if not any(map(bool, [self.ssh_key_content, self.ssh_key_file])):
            raise PrintableError(
                f"Must specify either `[blue]ENCORD_SSH_KEY_FILE[/blue]` or `[blue]ENCORD_SSH_KEY[/blue]` env variables. If you don't have an ssh key, please refere to our docs:{os.linesep}[magenta]https://docs.encord.com/platform-documentation/Annotate/annotate-api-keys#creating-keys-using-terminal-powershell[/magenta]"
            )

        if all(map(bool, [self.ssh_key_file, self.ssh_key_content])):
            import warnings

            warnings.warn(
                "You have configured both the `ENCORD_SSH_KEY` and `ENCORD_SSH_KEY_FILE`. The `ENCORD_SSH_KEY` will take precedence."
            )

        return self

    @property
    def ssh_key(self) -> str:
        return self.ssh_key_content if self.ssh_key_content else self.ssh_key_file.read_text()
ssh_key_content class-attribute instance-attribute
ssh_key_content: Optional[str] = Field(validation_alias='ENCORD_SSH_KEY', default=None)

The content of the private ssh key file to authenticate with Encord.

Either this or the ENCORD_SSH_KEY needs to be set for most use-cases. To setup a key with Encord, please see the platform docs.

ssh_key_file class-attribute instance-attribute
ssh_key_file: Optional[Path] = Field(validation_alias='ENCORD_SSH_KEY_FILE', default=None)

The path to the private ssh key file to authenticate with Encord.

Either this or the ENCORD_SSH_KEY needs to be set for most use-cases. To setup a key with Encord, please see the platform docs.

encord_agents.core.utils

download_asset

download_asset(lr: LabelRowV2, frame: int | None) -> Generator[Path, None, None]

Download the asset associated to a label row to disk.

This function is a context manager. Data will be cleaned up when the context is left.

Example usage:

with download_asset(lr, 10) as asset_path:
    # In here the file exists
    pixel_values = np.asarray(Image.open(asset_path))

# outside, it will be cleaned up

Parameters:

  • lr (LabelRowV2) –

    The label row for which you want to download the associated asset.

  • frame (int | None) –

    The frame that you need. If frame is none for a video, you will get the video path.

Raises:

  • NotImplementedError

    If you try to get all frames of an image group.

  • ValueError

    If you try to download an unsupported data type (e.g., DICOM).

Yields:

  • Path

    The file path for the requested asset.

Source code in encord_agents/core/utils.py
@contextmanager
def download_asset(lr: LabelRowV2, frame: int | None) -> Generator[Path, None, None]:
    """
    Download the asset associated to a label row to disk.

    This function is a context manager. Data will be cleaned up when the context is left.

    Example usage:

        with download_asset(lr, 10) as asset_path:
            # In here the file exists
            pixel_values = np.asarray(Image.open(asset_path))

        # outside, it will be cleaned up

    Args:
        lr: The label row for which you want to download the associated asset.
        frame: The frame that you need. If frame is none for a video, you will get the video path.

    Raises:
        NotImplementedError: If you try to get all frames of an image group.
        ValueError: If you try to download an unsupported data type (e.g., DICOM).


    Yields:
        The file path for the requested asset.

    """
    video_item, images_list = lr._project_client.get_data(lr.data_hash, get_signed_url=True)
    if lr.data_type in [DataType.VIDEO, DataType.IMAGE] and video_item:
        url = video_item["file_link"]
    elif lr.data_type == DataType.IMG_GROUP and images_list:
        if frame is None:
            raise NotImplementedError(
                "Downloading entire image group is not supported. Please contact Encord at support@encord.com for help or submit a PR with an implementation."
            )
        url = images_list[frame]["file_link"]
    else:
        raise ValueError(f"Couldn't load asset of type {lr.data_type}")

    response = requests.get(url)
    response.raise_for_status()

    suffix = _guess_file_suffix(url, lr)
    file_path = Path(lr.data_hash).with_suffix(suffix)
    with open(file_path, "wb") as f:
        f.write(response.content)

    files_to_unlink = [file_path]
    if lr.data_type == DataType.VIDEO and frame is not None:  # Get that exact frame
        frame_content = get_frame(file_path, frame)
        frame_file = file_path.with_name(f"{file_path.name}_{frame}").with_suffix(".png")
        cv2.imwrite(frame_file.as_posix(), frame_content)
        files_to_unlink.append(frame_file)
        file_path = frame_file
    try:
        yield file_path
    finally:
        [f.unlink(missing_ok=True) for f in files_to_unlink]

get_initialised_label_row

get_initialised_label_row(frame_data: FrameData) -> LabelRowV2

Get an initialised label row from the frame_data information.

Parameters:

  • frame_data (FrameData) –

    The data pointing to the data asset.

Raises:

  • Exception

    If the frame_data cannot be matched to a label row

Returns:

  • LabelRowV2

    The initialized label row.

Source code in encord_agents/core/utils.py
def get_initialised_label_row(frame_data: FrameData) -> LabelRowV2:
    """
    Get an initialised label row from the frame_data information.

    Args:
        frame_data: The data pointing to the data asset.

    Raises:
        Exception: If the `frame_data` cannot be matched to a label row

    Returns:
        The initialized label row.

    """
    user_client = get_user_client()
    project = user_client.get_project(str(frame_data.project_hash))
    matched_lrs = project.list_label_rows_v2(data_hashes=[frame_data.data_hash])
    num_matches = len(matched_lrs)
    if num_matches > 1:
        raise Exception(f"Non unique match: matched {num_matches} label rows!")
    elif num_matches == 0:
        raise Exception("No label rows were matched!")
    lr = matched_lrs.pop()
    lr.initialise_labels(include_signed_url=True)
    return lr

get_user_client cached

get_user_client() -> EncordUserClient

Generate an user client to access Encord.

Returns:

  • EncordUserClient

    An EncordUserClient authenticated with the credentials from the encord_agents.core.settings.Settings.

Source code in encord_agents/core/utils.py
@lru_cache(maxsize=1)
def get_user_client() -> EncordUserClient:
    """
    Generate an user client to access Encord.

    Returns:
        An EncordUserClient authenticated with the credentials from the encord_agents.core.settings.Settings.

    """
    settings = Settings()  # type: ignore
    kwargs: dict[str, Any] = {"domain": settings.domain} if settings.domain else {}
    return EncordUserClient.create_with_ssh_private_key(ssh_private_key=settings.ssh_key, **kwargs)

encord_agents.core.video

get_frame

get_frame(video_path: Path, desired_frame: int) -> NDArray[np.uint8]

Extract an exact frame from a video.

Parameters:

  • video_path (Path) –

    The file path to where the video is stored.

  • desired_frame (int) –

    The frame to extract

Raises:

  • Exception

    If the video cannot be opened properly or the requested frame could not be retrieved from the video.

Returns:

  • NDArray[uint8]

    Numpy array of shape [h, w, c] where channels are RGB.

Source code in encord_agents/core/video.py
def get_frame(video_path: Path, desired_frame: int) -> NDArray[np.uint8]:
    """
    Extract an exact frame from a video.

    Args:
        video_path: The file path to where the video is stored.
        desired_frame: The frame to extract

    Raises:
        Exception:  If the video cannot be opened properly or the requested
            frame could not be retrieved from the video.

    Returns:
        Numpy array of shape [h, w, c] where channels are RGB.

    """
    cap = cv2.VideoCapture(video_path.as_posix())
    if not cap.isOpened():
        raise Exception("Error opening video file.")

    cap.set(cv2.CAP_PROP_POS_FRAMES, desired_frame)

    ret, frame = cap.read()
    if not ret:
        raise Exception("Error retrieving frame.")

    cap.release()
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    return frame.astype(np.uint8)

iter_video

iter_video(video_path: Path) -> Iterator[Frame]

Iterate video frame by frame.

Parameters:

  • video_path (Path) –

    The file path to the video you wish to iterate.

Raises:

  • Exception

    If the video file could not be opened properly.

Yields:

  • Frame

    Frames from the video.

Source code in encord_agents/core/video.py
def iter_video(video_path: Path) -> Iterator[Frame]:
    """
    Iterate video frame by frame.

    Args:
        video_path: The file path to the video you wish to iterate.

    Raises:
        Exception: If the video file could not be opened properly.

    Yields:
        Frames from the video.

    """
    cap = cv2.VideoCapture(video_path.as_posix())
    if not cap.isOpened():
        raise Exception("Error opening video file.")

    frame_num = 0
    ret, frame = cap.read()
    while ret:
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        yield Frame(frame=frame_num, content=rgb_frame.astype(np.uint8))

        ret, frame = cap.read()
        frame_num += 1

    cap.release()