Skip to content

Core Reference

encord_agents.core.data_model

EditorAgentResponse

Bases: BaseModel

A base class for all return types of editor agent functions.

Source code in encord_agents/core/data_model.py
class EditorAgentResponse(BaseModel):
    """
    A base class for all return types of editor agent functions.
    """

    message: str | None = None
    """
    A message to be displayed to the user.
    """
message class-attribute instance-attribute
message: str | None = None

A message to be displayed to the user.

Frame dataclass

A dataclass to hold the content of one frame in a video.

Source code in encord_agents/core/data_model.py
@dataclass(frozen=True)
class Frame:
    """
    A dataclass to hold the content of one frame in a video.
    """

    frame: int
    """
    The frame number within the video
    """
    content: "NDArray[np.uint8]"
    """
    An [h,w,c] np.array with color channels RGB.
    """

    @overload
    def b64_encoding(
        self,
        image_format: Base64Formats = ".jpeg",
        output_format: Literal["raw", "url"] = "raw",
    ) -> str: ...

    @overload
    def b64_encoding(
        self,
        image_format: Literal[".jpeg", ".jpg", ".png"] = ".jpeg",
        output_format: Literal["openai", "anthropic"] = "openai",
    ) -> dict[str, str | dict[str, str]]: ...

    def b64_encoding(
        self,
        image_format: Literal[".jpeg", ".jpg", ".png"] = ".jpeg",
        output_format: Literal["url", "openai", "anthropic", "raw"] = "url",
    ) -> str | dict[str, str | dict[str, str]]:
        """
        Get a base64 representation of the image content.

        This method allows you to convert the content into a base64 representation
        based on various different image encodings.
        This is useful, e.g., for prompting LLMs with image content.


        Please see details for formats below.

        Args:
            image_format: Which type of image encoding to use.
            output_format: Different common formats.
                - `raw`: the image content as a raw b64 string
                - `url`: url encoded image content. Compatible with, e.g., `<img src="<the_encoding>" />`
                - `openai`: a dict with `type` and `image_url` keys
                _ `anthropic`: a dict with `media_type`, `type`, and `data` keys.

        Returns: a dict or string depending on `output_format`.

        """
        from encord_agents.core.vision import DATA_TYPES, b64_encode_image

        b64_str = b64_encode_image(self.content, image_format)
        if output_format == "raw":
            return b64_str

        media_type = DATA_TYPES.get(image_format, f"image/{image_format.replace('.', '')}")
        image_url = f"data:{media_type};base64,{b64_str}"
        if output_format == "url":
            return image_url
        elif output_format == "openai":
            return {
                "type": "image_url",
                "image_url": {
                    "url": image_url,
                },
            }
        elif output_format == "anthropic":
            return {
                "type": "image",
                "source": {
                    "type": "base64",
                    "media_type": media_type,
                    "data": b64_str,
                },
            }
content instance-attribute
content: NDArray[uint8]

An [h,w,c] np.array with color channels RGB.

frame instance-attribute
frame: int

The frame number within the video

b64_encoding
b64_encoding(image_format: Base64Formats = '.jpeg', output_format: Literal['raw', 'url'] = 'raw') -> str
b64_encoding(image_format: Literal['.jpeg', '.jpg', '.png'] = '.jpeg', output_format: Literal['openai', 'anthropic'] = 'openai') -> dict[str, str | dict[str, str]]
b64_encoding(image_format: Literal['.jpeg', '.jpg', '.png'] = '.jpeg', output_format: Literal['url', 'openai', 'anthropic', 'raw'] = 'url') -> str | dict[str, str | dict[str, str]]

Get a base64 representation of the image content.

This method allows you to convert the content into a base64 representation based on various different image encodings. This is useful, e.g., for prompting LLMs with image content.

Please see details for formats below.

Parameters:

  • image_format (Literal['.jpeg', '.jpg', '.png'], default: '.jpeg' ) –

    Which type of image encoding to use.

  • output_format (Literal['url', 'openai', 'anthropic', 'raw'], default: 'url' ) –

    Different common formats. - raw: the image content as a raw b64 string - url: url encoded image content. Compatible with, e.g., <img src="<the_encoding>" /> - openai: a dict with type and image_url keys _ anthropic: a dict with media_type, type, and data keys.

Returns: a dict or string depending on output_format.

Source code in encord_agents/core/data_model.py
def b64_encoding(
    self,
    image_format: Literal[".jpeg", ".jpg", ".png"] = ".jpeg",
    output_format: Literal["url", "openai", "anthropic", "raw"] = "url",
) -> str | dict[str, str | dict[str, str]]:
    """
    Get a base64 representation of the image content.

    This method allows you to convert the content into a base64 representation
    based on various different image encodings.
    This is useful, e.g., for prompting LLMs with image content.


    Please see details for formats below.

    Args:
        image_format: Which type of image encoding to use.
        output_format: Different common formats.
            - `raw`: the image content as a raw b64 string
            - `url`: url encoded image content. Compatible with, e.g., `<img src="<the_encoding>" />`
            - `openai`: a dict with `type` and `image_url` keys
            _ `anthropic`: a dict with `media_type`, `type`, and `data` keys.

    Returns: a dict or string depending on `output_format`.

    """
    from encord_agents.core.vision import DATA_TYPES, b64_encode_image

    b64_str = b64_encode_image(self.content, image_format)
    if output_format == "raw":
        return b64_str

    media_type = DATA_TYPES.get(image_format, f"image/{image_format.replace('.', '')}")
    image_url = f"data:{media_type};base64,{b64_str}"
    if output_format == "url":
        return image_url
    elif output_format == "openai":
        return {
            "type": "image_url",
            "image_url": {
                "url": image_url,
            },
        }
    elif output_format == "anthropic":
        return {
            "type": "image",
            "source": {
                "type": "base64",
                "media_type": media_type,
                "data": b64_str,
            },
        }

FrameData

Bases: BaseModel

Holds the data sent from the Encord Label Editor at the time of triggering the agent.

Source code in encord_agents/core/data_model.py
class FrameData(BaseModel):
    """
    Holds the data sent from the Encord Label Editor at the time of triggering the agent.
    """

    project_hash: UUID = Field(alias="projectHash")
    """
    The identifier of the given project.
    """
    data_hash: UUID = Field(alias="dataHash")
    """
    The identifier of the given data asset.
    """
    frame: int = Field(ge=0)
    """
    The frame number. If single image, it's default 0.
    """
    object_hashes: list[str] | None = Field(alias="objectHashes", default=None)
    """
    Object hashes if the request was made on particular objects from the App
    """
data_hash class-attribute instance-attribute
data_hash: UUID = Field(alias='dataHash')

The identifier of the given data asset.

frame class-attribute instance-attribute
frame: int = Field(ge=0)

The frame number. If single image, it's default 0.

object_hashes class-attribute instance-attribute
object_hashes: list[str] | None = Field(alias='objectHashes', default=None)

Object hashes if the request was made on particular objects from the App

project_hash class-attribute instance-attribute
project_hash: UUID = Field(alias='projectHash')

The identifier of the given project.

InstanceCrop dataclass

Bases: Frame

A dataclass to hold the frame content of one object instance in a video or image.

Source code in encord_agents/core/data_model.py
@dataclass(frozen=True)
class InstanceCrop(Frame):
    """
    A dataclass to hold the frame content of one object instance in a video or image.
    """

    instance: ObjectInstance
    r"""
    The [ObjectInstance](https://docs.encord.com/sdk-documentation/sdk-references/ObjectInstance#objectinstance){ target="\_blank", rel="noopener noreferrer" } associated to the crop.
    """
instance instance-attribute
instance: ObjectInstance

The ObjectInstance associated to the crop.

LabelRowInitialiseLabelsArgs

Bases: BaseModel

Arguments used to specify how to initialise labels via the SDK.

The arguments are passed to LabelRowV2.initialise_labels.

Source code in encord_agents/core/data_model.py
class LabelRowInitialiseLabelsArgs(BaseModel):
    """
    Arguments used to specify how to initialise labels via the SDK.

    The arguments are passed to `LabelRowV2.initialise_labels`.
    """

    include_object_feature_hashes: set[str] | None = None
    include_classification_feature_hashes: set[str] | None = None
    include_reviews: bool = False
    overwrite: bool = False
    include_signed_url: bool = False

LabelRowMetadataIncludeArgs

Bases: BaseModel

Warning, including metadata via label rows is good for reading metadata not for writing to the metadata.

If you need to write to metadata, use the dep_storage_item dependencies instead.

Source code in encord_agents/core/data_model.py
class LabelRowMetadataIncludeArgs(BaseModel):
    """
    Warning, including metadata via label rows is good for _reading_ metadata
    **not** for writing to the metadata.

    If you need to write to metadata, use the `dep_storage_item` dependencies instead.
    """

    include_workflow_graph_node: bool = True
    include_client_metadata: bool = False
    include_images_data: bool = False
    include_all_label_branches: bool = False
    branch_name: str | None = None

    @model_validator(mode="after")
    def check_branches_consistent(self) -> Self:
        if self.branch_name and self.include_all_label_branches:
            raise ValueError("Can't request all branches and a specific branch")
        return self

encord_agents.core.dependencies

serverless

This module defines dependencies available for injection within serverless Editor Agents. These dependencies can be used independently, even when reliant on other dependencies.

Note: The injection mechanism necessitates the presence of type annotations for the following parameters to ensure proper resolution.

from encord.project import Project
from encord.objects.ontology_labels_impl import LabelRowV2
from encord_agents import FrameData
...
@editor_agent()
def my_agent(
    frame_data: FrameData,
    project: Project,
    label_row: LabelRowV2,
):
    ...
  • FrameData is automatically injected via the api request body.
  • Project is automatically loaded based on the frame data.
  • label_row_v2 is automatically loaded based on the frame data.
DAssetPath module-attribute
DAssetPath = Annotated[Path, Depends(dep_asset)]

Get a local file path to data asset temporarily stored till end of agent execution.

DEncordClient module-attribute
DEncordClient = Annotated[EncordUserClient, Depends(dep_client)]

Get an authenticated user client.

DObjectCrops module-attribute
DObjectCrops = Annotated[list[InstanceCrop], Depends(dep_object_crops)]

Get all object crops that the agent was triggered on. The instance crop contains the object instance, the frame content (pixel values), and the frame.

DObjectsInstances module-attribute
DObjectsInstances = Annotated[list[ObjectInstance], Depends(dep_objects)]

Get all object instances that the agent was triggered on. No pixels, just the annotation.

DSingleFrame module-attribute
DSingleFrame = Annotated[NDArray[uint8], Depends(dep_single_frame)]

Get the single frame that the agent was triggered on.

DStorageItem module-attribute
DStorageItem = Annotated[StorageItem, Depends(dep_storage_item)]

Get the storage item associated with the underlying agent task to, for example, read/write client metadata or read data properties.

DVideoIterator module-attribute
DVideoIterator = Annotated[Iterator[Frame], Depends(dep_video_iterator)]

Get a video frame iterator for doing things over many frames.

dep_asset
dep_asset(storage_item: StorageItem) -> Generator[Path, None, None]

Returns a local file path to the data asset, temporarily stored for the duration of the agent's execution.

This dependency fetches the underlying data asset using a signed URL.

The asset is temporarily stored on disk for the duration of the task and is automatically removed once the task completes.

Example:

from encord_agents.gcp import editor_agent
from encord_agents.gcp.dependencies import dep_asset
...
runner = Runner(project_hash="<project_hash_a>")

@editor_agent()
def my_agent(
    asset: Annotated[Path, Depends(dep_asset)]
) -> None:
    asset.stat()  # read file stats
    ...

Returns:

  • None –

    The path to the asset.

Raises:

  • ValueError –

    if the underlying assets are not videos, images, or audio.

  • EncordException –

    if data type not supported by SDK yet.

Source code in encord_agents/core/dependencies/serverless.py
def dep_asset(storage_item: StorageItem) -> Generator[Path, None, None]:
    """
    Returns a local file path to the data asset, temporarily stored for the duration of the agent's execution.


    This dependency fetches the underlying data asset using a signed URL.

    The asset is temporarily stored on disk for the duration of the task and is automatically removed once the task
    completes.

    **Example:**

    ```python
    from encord_agents.gcp import editor_agent
    from encord_agents.gcp.dependencies import dep_asset
    ...
    runner = Runner(project_hash="<project_hash_a>")

    @editor_agent()
    def my_agent(
        asset: Annotated[Path, Depends(dep_asset)]
    ) -> None:
        asset.stat()  # read file stats
        ...
    ```

    Returns:
        The path to the asset.

    Raises:
        ValueError: if the underlying assets are not videos, images, or audio.
        EncordException: if data type not supported by SDK yet.
    """
    with download_asset(storage_item) as asset:
        yield asset
dep_client
dep_client() -> EncordUserClient

Dependency to provide an authenticated user client.

Example:

from encord.user_client import EncordUserClient
from encord_agents.gcp import editor_agent
from encord_agents.gcp.dependencies import dep_client
...
@editor_agent()
def (
    client: Annotated[EncordUserClient, Depends(dep_client)]
):
    # Client will authenticated and ready to use.
    client.get_dataset("")
Source code in encord_agents/core/dependencies/serverless.py
def dep_client() -> EncordUserClient:
    """
    Dependency to provide an authenticated user client.

    **Example:**

    ```python
    from encord.user_client import EncordUserClient
    from encord_agents.gcp import editor_agent
    from encord_agents.gcp.dependencies import dep_client
    ...
    @editor_agent()
    def (
        client: Annotated[EncordUserClient, Depends(dep_client)]
    ):
        # Client will authenticated and ready to use.
        client.get_dataset("")
    ```

    """
    return get_user_client()
dep_data_lookup
dep_data_lookup(lookup: Annotated[DataLookup, Depends(DataLookup.sharable)]) -> DataLookup

Returns a lookup for easily retrieving data rows and storage items associated with the given task.

Deprecated

dep_data_lookup is deprecated and will be removed in version 0.2.10. Use dep_storage_item instead for accessing storage items.

Migration Guide:

# Old way (deprecated)
from encord_agents.core.dependencies.serverless import dep_data_lookup, DataLookup

@editor_agent()
def my_agent(
    frame_data: FrameData,
    lookup: Annotated[DataLookup, Depends(dep_data_lookup)]
):
    storage_item = lookup.get_storage_item(frame_data.data_hash)
    ...

# New way (recommended)
from encord_agents.gcp.dependencies import dep_storage_item
# or from encord_agents.aws.dependencies import dep_storage_item
# or from encord_agents.fastapi.dependencies import dep_storage_item

@editor_agent()
def my_agent(
    frame_data: FrameData,
    storage_item: Annotated[StorageItem, Depends(dep_storage_item)]
):
    # storage_item is directly available
    print(storage_item.client_metadata)
    ...

Parameters:

  • lookup (Annotated[DataLookup, Depends(sharable)]) –

    The object that you can use to lookup data rows and storage items. Automatically injected.

Returns:

  • DataLookup –

    The (shared) lookup object.

Source code in encord_agents/core/dependencies/serverless.py
def dep_data_lookup(lookup: Annotated[DataLookup, Depends(DataLookup.sharable)]) -> DataLookup:
    """
    Returns a lookup for easily retrieving data rows and storage items associated with the given task.

    !!! warning "Deprecated"
        `dep_data_lookup` is deprecated and will be removed in version 0.2.10.
        Use `dep_storage_item` instead for accessing storage items.

    **Migration Guide:**

    ```python
    # Old way (deprecated)
    from encord_agents.core.dependencies.serverless import dep_data_lookup, DataLookup

    @editor_agent()
    def my_agent(
        frame_data: FrameData,
        lookup: Annotated[DataLookup, Depends(dep_data_lookup)]
    ):
        storage_item = lookup.get_storage_item(frame_data.data_hash)
        ...

    # New way (recommended)
    from encord_agents.gcp.dependencies import dep_storage_item
    # or from encord_agents.aws.dependencies import dep_storage_item
    # or from encord_agents.fastapi.dependencies import dep_storage_item

    @editor_agent()
    def my_agent(
        frame_data: FrameData,
        storage_item: Annotated[StorageItem, Depends(dep_storage_item)]
    ):
        # storage_item is directly available
        print(storage_item.client_metadata)
        ...
    ```

    Args:
        lookup: The object that you can use to lookup data rows and storage items. Automatically injected.

    Returns:
        The (shared) lookup object.

    """
    import warnings

    warnings.warn(
        "dep_data_lookup is deprecated and will be removed in version 0.2.10. "
        "Use 'dep_storage_item' instead for accessing storage items. "
        "See the function docstring for migration examples.",
        DeprecationWarning,
        stacklevel=2,
    )
    return lookup
dep_object_crops
dep_object_crops(filter_ontology_objects: list[Object | str] | None = None) -> Callable[[FrameData, LabelRowV2, NDArray[np.uint8]], list[InstanceCrop]]

Returns a list of object instances and frame crops associated with each object.

One example use-case is to run each crop against a model.

Example:

@editor_agent
def my_agent(crops: Annotated[list[InstanceCrop], Depends[dep_object_crops(filter_ontology_objects=["eBw/75bg"])]]):
    for crop in crops:
        crop.content  # <- this is raw numpy rgb values
        crop.frame    # <- this is the frame number in video
        crop.instance # <- this is the object instance from the label row
        crop.b64_encoding()  # <- a base64 encoding of the image content
    ...

Parameters:

  • filter_ontology_objects (list[Object | str] | None, default: None ) –

    Specify a list of ontology objects to include. If provided, only instances of these object types are included. Strings are matched against feature_node_hashes.

Returns: The dependency to be injected into the cloud function.

Source code in encord_agents/core/dependencies/serverless.py
def dep_object_crops(
    filter_ontology_objects: list[Object | str] | None = None,
) -> Callable[[FrameData, LabelRowV2, NDArray[np.uint8]], list[InstanceCrop]]:
    """
    Returns a list of object instances and frame crops associated with each object.

    One example use-case is to run each crop against a model.

    **Example:**

    ```python
    @editor_agent
    def my_agent(crops: Annotated[list[InstanceCrop], Depends[dep_object_crops(filter_ontology_objects=["eBw/75bg"])]]):
        for crop in crops:
            crop.content  # <- this is raw numpy rgb values
            crop.frame    # <- this is the frame number in video
            crop.instance # <- this is the object instance from the label row
            crop.b64_encoding()  # <- a base64 encoding of the image content
        ...
    ```

    Args:
        filter_ontology_objects: Specify a list of ontology objects to include.
            If provided, only instances of these object types are included.
            Strings are matched against `feature_node_hashes`.


    Returns: The dependency to be injected into the cloud function.

    """
    from encord_agents.core.vision import crop_to_object

    legal_feature_hashes = {
        o.feature_node_hash if isinstance(o, Object) else o for o in (filter_ontology_objects or [])
    }

    def _dep_object_crops(
        frame_data: FrameData, lr: LabelRowV2, frame: Annotated[NDArray[np.uint8], Depends(dep_single_frame)]
    ) -> list[InstanceCrop]:
        legal_shapes = {Shape.POLYGON, Shape.BOUNDING_BOX, Shape.ROTATABLE_BOUNDING_BOX, Shape.BITMASK}
        return [
            InstanceCrop(
                frame=frame_data.frame,
                content=crop_to_object(frame, o.get_annotation(frame=frame_data.frame).coordinates),  # type: ignore
                instance=o,
            )
            for o in lr.get_object_instances(filter_frames=frame_data.frame)
            if o.ontology_item.shape in legal_shapes
            and (not legal_feature_hashes or o.feature_hash in legal_feature_hashes)
            and (not frame_data.object_hashes or o.object_hash in frame_data.object_hashes)
        ]

    return _dep_object_crops
dep_single_frame
dep_single_frame(storage_item: StorageItem, frame_data: FrameData) -> NDArray[np.uint8]

Dependency to inject the first frame of the underlying asset.

The downloaded asset will be named lr.data_hash.{suffix}. When the function has finished running, the downloaded file is removed from the file system.

Example:

from encord_agents import FrameData
from encord_agents.gcp import editor_agent
from encord_agents.gcp.dependencies import dep_single_frame
...

@editor_agent()
def my_agent(
    frame: Annotated[NDArray[np.uint8], Depends(dep_single_frame)]
):
    assert frame.ndim == 3, "Will work"

Parameters:

  • storage_item (StorageItem) –

    The Storage item. Automatically injected (see example above).

Returns:

  • NDArray[uint8] –

    Numpy array of shape [h, w, 3] RGB colors.

Source code in encord_agents/core/dependencies/serverless.py
def dep_single_frame(storage_item: StorageItem, frame_data: FrameData) -> NDArray[np.uint8]:
    """
    Dependency to inject the first frame of the underlying asset.

    The downloaded asset will be named `lr.data_hash.{suffix}`.
    When the function has finished running, the downloaded file is removed from the file system.

    **Example:**

    ```python
    from encord_agents import FrameData
    from encord_agents.gcp import editor_agent
    from encord_agents.gcp.dependencies import dep_single_frame
    ...

    @editor_agent()
    def my_agent(
        frame: Annotated[NDArray[np.uint8], Depends(dep_single_frame)]
    ):
        assert frame.ndim == 3, "Will work"
    ```

    Args:
        storage_item: The Storage item. Automatically injected (see example above).

    Returns:
        Numpy array of shape [h, w, 3] RGB colors.

    """

    try:
        import cv2
    except ImportError:
        raise ImportError(
            "Your data agent is depending on computer vision capabilities and `opencv` is not installed. Please install either `opencv-python` or `opencv-python-headless`."
        )

    with download_asset(storage_item, frame=frame_data.frame) as asset:
        img = cv2.cvtColor(cv2.imread(asset.as_posix()), cv2.COLOR_BGR2RGB)

    return np.asarray(img, dtype=np.uint8)
dep_storage_item
dep_storage_item(storage_item: StorageItem) -> StorageItem

Get the storage item associated with the underlying agent task.

The StorageItem is useful for multiple things like

  • Updating client metadata
  • Reading file properties like storage location, fps, duration, DICOM tags, etc.

Example

from typing_extensions import Annotated
from encord.storage import StorageItem
from encord_agents.gcp import editor_agent, Depends
from encord_agents.gcp.dependencies import dep_storage_item


@editor_agent()
def my_agent(storage_item: Annotated[StorageItem, Depends(dep_storage_item)]):
    print("uuid", storage_item.uuid)
    print("client_metadata", storage_item.client_metadata)
    ...
Source code in encord_agents/core/dependencies/serverless.py
def dep_storage_item(storage_item: StorageItem) -> StorageItem:
    r"""
    Get the storage item associated with the underlying agent task.

    The [`StorageItem`](https://docs.encord.com/sdk-documentation/sdk-references/StorageItem){ target="\_blank", rel="noopener noreferrer" }
    is useful for multiple things like

    * Updating client metadata
    * Reading file properties like storage location, fps, duration, DICOM tags, etc.

    **Example**

    ```python
    from typing_extensions import Annotated
    from encord.storage import StorageItem
    from encord_agents.gcp import editor_agent, Depends
    from encord_agents.gcp.dependencies import dep_storage_item


    @editor_agent()
    def my_agent(storage_item: Annotated[StorageItem, Depends(dep_storage_item)]):
        print("uuid", storage_item.uuid)
        print("client_metadata", storage_item.client_metadata)
        ...
    ```

    """
    return storage_item
dep_video_iterator
dep_video_iterator(storage_item: StorageItem) -> Generator[Iterator[Frame], None, None]

Dependency to inject a video frame iterator for performing operations over many frames.

Example:

from encord_agents import FrameData
from encord_agents.gcp import editor_agent
from encord_agents.gcp.dependencies import dep_video_iterator
...

@editor_agent()
def my_agent(
    video_frames: Annotated[Iterator[Frame], Depends(dep_video_iterator)]
):
    for frame in video_frames:
        print(frame.frame, frame.content.shape)

Parameters:

  • storage_item (StorageItem) –

    Automatically injected storage item dependency.

Raises:

  • NotImplementedError –

    Fails for data types other than video.

Yields:

  • Iterator[Frame] –

    An iterator.

Source code in encord_agents/core/dependencies/serverless.py
def dep_video_iterator(storage_item: StorageItem) -> Generator[Iterator[Frame], None, None]:
    """
    Dependency to inject a video frame iterator for performing operations over many frames.

    **Example:**

    ```python
    from encord_agents import FrameData
    from encord_agents.gcp import editor_agent
    from encord_agents.gcp.dependencies import dep_video_iterator
    ...

    @editor_agent()
    def my_agent(
        video_frames: Annotated[Iterator[Frame], Depends(dep_video_iterator)]
    ):
        for frame in video_frames:
            print(frame.frame, frame.content.shape)
    ```

    Args:
        storage_item: Automatically injected storage item dependency.

    Raises:
        NotImplementedError: Fails for data types other than video.

    Yields:
        An iterator.

    """
    from encord_agents.core.video import iter_video

    if not storage_item.item_type == StorageItemType.VIDEO:
        raise NotImplementedError("`dep_video_iterator` only supported for video label rows")

    with download_asset(storage_item, None) as asset:
        yield iter_video(asset)

shares

DataLookup

Deprecated

DataLookup is deprecated and will be removed in version 0.2.10.

Migration Guide:

  • For accessing storage items, use dep_storage_item instead:
    # Old way (deprecated)
    from encord_agents.core.dependencies.shares import DataLookup
    lookup: Annotated[DataLookup, Depends(dep_data_lookup)]
    storage_item = lookup.get_storage_item(data_hash)
    
    # New way (recommended)
    from encord_agents.tasks.dependencies import dep_storage_item
    # or from encord_agents.aws.dependencies import dep_storage_item
    # or from encord_agents.gcp.dependencies import dep_storage_item
    # or from encord_agents.fastapi.dependencies import dep_storage_item
    storage_item: Annotated[StorageItem, Depends(dep_storage_item)]
    
Source code in encord_agents/core/dependencies/shares.py
class DataLookup:
    """
    !!! warning "Deprecated"
        `DataLookup` is deprecated and will be removed in version 0.2.10.

        **Migration Guide:**

        - For accessing storage items, use `dep_storage_item` instead:
          ```python
          # Old way (deprecated)
          from encord_agents.core.dependencies.shares import DataLookup
          lookup: Annotated[DataLookup, Depends(dep_data_lookup)]
          storage_item = lookup.get_storage_item(data_hash)

          # New way (recommended)
          from encord_agents.tasks.dependencies import dep_storage_item
          # or from encord_agents.aws.dependencies import dep_storage_item
          # or from encord_agents.gcp.dependencies import dep_storage_item
          # or from encord_agents.fastapi.dependencies import dep_storage_item
          storage_item: Annotated[StorageItem, Depends(dep_storage_item)]
          ```
    """

    __instances__: dict[UUID, DataLookup] = {}

    def __init__(self, dataset_hashes: list[str | UUID] | None = None) -> None:
        warnings.warn(
            "DataLookup is deprecated and will be removed in version 0.2.10. "
            "Use 'dep_storage_item' dependency instead for accessing storage items, "
            "or use the EncordUserClient directly for more complex data access patterns. "
            "See the class docstring for migration examples.",
            DeprecationWarning,
            stacklevel=2,
        )
        self.user_client = get_user_client()
        self.datasets = {UUID(d): self.user_client.get_dataset(d) for d in map(str, dataset_hashes or [])}
        self.data_rows = {dr.uid: dr for dataset in self.datasets.values() for dr in dataset.data_rows}

    @classmethod
    def sharable(cls, project: Project) -> DataLookup:
        warnings.warn(
            "DataLookup.sharable() is deprecated and will be removed in version 0.2.10. "
            "Use 'dep_storage_item' dependency instead for accessing storage items. "
            "See the DataLookup class docstring for migration examples.",
            DeprecationWarning,
            stacklevel=2,
        )
        ph = UUID(project.project_hash)
        if ph not in cls.__instances__:
            cls.__instances__[ph] = cls([ds.dataset_hash for ds in project.list_datasets()])
        return cls.__instances__[ph]

    def get_data_row(self, data_hash: str | UUID, dataset_hash: str | UUID | None = None) -> DataRow:
        warnings.warn(
            "DataLookup.get_data_row() is deprecated and will be removed in version 0.2.10. "
            "Use the EncordUserClient directly: "
            "project.list_label_rows_v2(data_hashes=[data_hash]) to get label rows, "
            "then access the data row via label_row.data_row if needed.",
            DeprecationWarning,
            stacklevel=2,
        )
        dr = self.data_rows.get(str(data_hash))
        if dr is None:
            if dataset_hash is not None:
                dataset = self.user_client.get_dataset(str(dataset_hash))
                self.datasets[UUID(str(dataset_hash))] = dataset
                self.data_rows.update({dr.uid: dr for dr in dataset.data_rows})
            else:
                raise ValueError(
                    "Cannot identify a data row without knowing the dataset hash. Please provide it to the function call or to the constructor of the `DataLookup`"
                )
            dr = self.data_rows.get(str(data_hash))
        if dr is None:
            raise ValueError("DatasetCache was not able to locate data row from data hash")
        return dr

    @property
    def backing_item_uuids(self) -> list[UUID]:
        """
        Get all backing item uuids for all data rows in the data lookup.

        !!! warning "Deprecated"
            This property is deprecated and will be removed in version 0.2.10.
            Use the EncordUserClient directly to access backing item UUIDs from label rows.
        """
        warnings.warn(
            "DataLookup.backing_item_uuids is deprecated and will be removed in version 0.2.10. "
            "Use the EncordUserClient directly to get backing item UUIDs from label rows: "
            "[lr.backing_item_uuid for lr in project.list_label_rows_v2()]",
            DeprecationWarning,
            stacklevel=2,
        )
        return [dr.backing_item_uuid for dr in self.data_rows.values()]

    def get_storage_item(
        self, data_hash: str | UUID, dataset_hash: str | UUID | None = None, sign_url: bool = False
    ) -> StorageItem:
        """
        !!! warning "Deprecated"
            This method is deprecated and will be removed in version 0.2.10.
            Use `dep_storage_item` dependency instead.

        Args:
            data_hash: Data hash for the asset for which you need the underlying storage item.
            dataset_hash: If you didn't provide the associated dataset hash in the constructor,
                this is your last chance.
            sign_url: If `True`, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).

        Raises:
            ValueError: Mainly if underlying data row cannot be found.

        Returns:
            The underlying storage item from which, e.g., client metadata can be updated.

        """
        warnings.warn(
            "DataLookup.get_storage_item() is deprecated and will be removed in version 0.2.10. "
            "Use 'dep_storage_item' dependency instead: "
            "storage_item: Annotated[StorageItem, Depends(dep_storage_item)]",
            DeprecationWarning,
            stacklevel=2,
        )
        try:
            dr = self.get_data_row(data_hash, dataset_hash)
        except ValueError:
            raise ValueError(
                "DatasetCache was not able to locate storage_item because the associated data row could not be identified."
            )

        return self.user_client.get_storage_item(dr.backing_item_uuid, sign_url=sign_url)

    def get_storage_items(
        self, data_hashes: list[str | UUID], dataset_hash: str | UUID | None = None, sign_urls: bool = False
    ) -> list[StorageItem]:
        """
        !!! warning "Deprecated"
            This method is deprecated and will be removed in version 0.2.10.
            Use the EncordUserClient directly for bulk storage item access.

        Args:
            data_hashes: Data hashes for the assets for which you need the underlying storage items.
            dataset_hash: If you didn't provided the associated dataset hash in the constructor,
                this is your last chance.
            sign_urls: If `True`, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).

        Raises:
            ValueError: Mainly if underlying data row cannot be found.

        Returns:
            list of underlying storage items from which, e.g., client metadata can be updated.
        """
        warnings.warn(
            "DataLookup.get_storage_items() is deprecated and will be removed in version 0.2.10. "
            "Use the EncordUserClient directly: "
            "client.get_storage_items([lr.backing_item_uuid for lr in label_rows], sign_url=sign_urls)",
            DeprecationWarning,
            stacklevel=2,
        )
        try:
            data_rows = [self.get_data_row(i, dataset_hash) for i in data_hashes]
        except ValueError:
            raise ValueError("Failed to load storage items because one or more data rows could not be obtained")

        return self.user_client.get_storage_items([dr.backing_item_uuid for dr in data_rows], sign_url=sign_urls)
backing_item_uuids property
backing_item_uuids: list[UUID]

Get all backing item uuids for all data rows in the data lookup.

Deprecated

This property is deprecated and will be removed in version 0.2.10. Use the EncordUserClient directly to access backing item UUIDs from label rows.

get_storage_item
get_storage_item(data_hash: str | UUID, dataset_hash: str | UUID | None = None, sign_url: bool = False) -> StorageItem

Deprecated

This method is deprecated and will be removed in version 0.2.10. Use dep_storage_item dependency instead.

Parameters:

  • data_hash (str | UUID) –

    Data hash for the asset for which you need the underlying storage item.

  • dataset_hash (str | UUID | None, default: None ) –

    If you didn't provide the associated dataset hash in the constructor, this is your last chance.

  • sign_url (bool, default: False ) –

    If True, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).

Raises:

  • ValueError –

    Mainly if underlying data row cannot be found.

Returns:

  • StorageItem –

    The underlying storage item from which, e.g., client metadata can be updated.

Source code in encord_agents/core/dependencies/shares.py
def get_storage_item(
    self, data_hash: str | UUID, dataset_hash: str | UUID | None = None, sign_url: bool = False
) -> StorageItem:
    """
    !!! warning "Deprecated"
        This method is deprecated and will be removed in version 0.2.10.
        Use `dep_storage_item` dependency instead.

    Args:
        data_hash: Data hash for the asset for which you need the underlying storage item.
        dataset_hash: If you didn't provide the associated dataset hash in the constructor,
            this is your last chance.
        sign_url: If `True`, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).

    Raises:
        ValueError: Mainly if underlying data row cannot be found.

    Returns:
        The underlying storage item from which, e.g., client metadata can be updated.

    """
    warnings.warn(
        "DataLookup.get_storage_item() is deprecated and will be removed in version 0.2.10. "
        "Use 'dep_storage_item' dependency instead: "
        "storage_item: Annotated[StorageItem, Depends(dep_storage_item)]",
        DeprecationWarning,
        stacklevel=2,
    )
    try:
        dr = self.get_data_row(data_hash, dataset_hash)
    except ValueError:
        raise ValueError(
            "DatasetCache was not able to locate storage_item because the associated data row could not be identified."
        )

    return self.user_client.get_storage_item(dr.backing_item_uuid, sign_url=sign_url)
get_storage_items
get_storage_items(data_hashes: list[str | UUID], dataset_hash: str | UUID | None = None, sign_urls: bool = False) -> list[StorageItem]

Deprecated

This method is deprecated and will be removed in version 0.2.10. Use the EncordUserClient directly for bulk storage item access.

Parameters:

  • data_hashes (list[str | UUID]) –

    Data hashes for the assets for which you need the underlying storage items.

  • dataset_hash (str | UUID | None, default: None ) –

    If you didn't provided the associated dataset hash in the constructor, this is your last chance.

  • sign_urls (bool, default: False ) –

    If True, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).

Raises:

  • ValueError –

    Mainly if underlying data row cannot be found.

Returns:

  • list[StorageItem] –

    list of underlying storage items from which, e.g., client metadata can be updated.

Source code in encord_agents/core/dependencies/shares.py
def get_storage_items(
    self, data_hashes: list[str | UUID], dataset_hash: str | UUID | None = None, sign_urls: bool = False
) -> list[StorageItem]:
    """
    !!! warning "Deprecated"
        This method is deprecated and will be removed in version 0.2.10.
        Use the EncordUserClient directly for bulk storage item access.

    Args:
        data_hashes: Data hashes for the assets for which you need the underlying storage items.
        dataset_hash: If you didn't provided the associated dataset hash in the constructor,
            this is your last chance.
        sign_urls: If `True`, pre-fetch a signed URLs for the items (otherwise the URLs will be signed on demand).

    Raises:
        ValueError: Mainly if underlying data row cannot be found.

    Returns:
        list of underlying storage items from which, e.g., client metadata can be updated.
    """
    warnings.warn(
        "DataLookup.get_storage_items() is deprecated and will be removed in version 0.2.10. "
        "Use the EncordUserClient directly: "
        "client.get_storage_items([lr.backing_item_uuid for lr in label_rows], sign_url=sign_urls)",
        DeprecationWarning,
        stacklevel=2,
    )
    try:
        data_rows = [self.get_data_row(i, dataset_hash) for i in data_hashes]
    except ValueError:
        raise ValueError("Failed to load storage items because one or more data rows could not be obtained")

    return self.user_client.get_storage_items([dr.backing_item_uuid for dr in data_rows], sign_url=sign_urls)

encord_agents.core.ontology

FieldType module-attribute

FieldType = Any

Field from pydantic can be anything so hard to type. This is supposed to indicate that you should use the pydantic.Field function to construct this var.

GenericFieldModel

Bases: BaseModel

Source code in encord_agents/core/ontology.py
class GenericFieldModel(BaseModel):
    feature_node_hash: str = Field()

    def set_answer(self, instance: ClassificationInstance) -> None:
        """
        This function will be called from the parsing loop to allow the model to set it self as answer
        on the classification instance.
        """
        ...
set_answer
set_answer(instance: ClassificationInstance) -> None

This function will be called from the parsing loop to allow the model to set it self as answer on the classification instance.

Source code in encord_agents/core/ontology.py
def set_answer(self, instance: ClassificationInstance) -> None:
    """
    This function will be called from the parsing loop to allow the model to set it self as answer
    on the classification instance.
    """
    ...

OntologyDataModel

Bases: Generic[OntologyType]

Class to create a pydantic model equivalent to an arbitrary classification ontology.

The model can be used to form a json schema based on the ontology. This is useful if you are, e.g., trying to get a structured response from an LLM.

Example:

from pydantic import ValidationError

classifications = project.ontology_structure.classifications
objects = project.ontology_structure.classifications

data_model = OntologyDataModel([objects])
# or
data_model = OntologyDataModel([classifications])

# Get a json schema for the ontology
print(data_model.model_json_schema_str)

# Parse json following the schema into label instances
json_str = my_favourite_llm(
    f"what is this? pls follow {schema}", img
)
try:
    instances = data_model(json_str)
except ValidationError:
    # invalid json
    ...

for ins in instances:
    label_row.add_classification_instance(ins)

label_row.save()

For a concrete example, please see

Attributes:

  • ontology –
  • DataModel (BaseModel) –
Source code in encord_agents/core/ontology.py
class OntologyDataModel(Generic[OntologyType]):
    """
    Class to create a pydantic model equivalent to an arbitrary classification ontology.

    The model can be used to form a json schema based on the ontology. This is useful if
    you are, e.g., trying to get a structured response from an LLM.

    **Example:**

    ```python
    from pydantic import ValidationError

    classifications = project.ontology_structure.classifications
    objects = project.ontology_structure.classifications

    data_model = OntologyDataModel([objects])
    # or
    data_model = OntologyDataModel([classifications])

    # Get a json schema for the ontology
    print(data_model.model_json_schema_str)

    # Parse json following the schema into label instances
    json_str = my_favourite_llm(
        f"what is this? pls follow {schema}", img
    )
    try:
        instances = data_model(json_str)
    except ValidationError:
        # invalid json
        ...

    for ins in instances:
        label_row.add_classification_instance(ins)

    label_row.save()
    ```

    For a concrete example, please see [](TODO)

    Attributes:
        ontology:
        DataModel:
    """

    def __init__(self, root_obj: list[OntologyType] | OntologyType):
        _root_obj: list[OntologyType]
        if isinstance(root_obj, list):
            assert len(root_obj) != 0, "No ontology objects given to transform into a pydantic model"
            first, *rest = root_obj
            assert all(
                (isinstance(r, type(first)) for r in rest)
            ), "You cannot mix classifications and objects in the same model"
            _root_obj = root_obj
        else:
            _root_obj = [root_obj]

        self.ontology_lookup: dict[str, OntologyType] = {
            a.feature_node_hash: r for r in _root_obj for a in r.attributes
        }
        self.DataModel: BaseModel
        if isinstance(_root_obj[0], Object):
            legal_shapes = {Shape.BOUNDING_BOX, Shape.BITMASK, Shape.POLYGON, Shape.ROTATABLE_BOUNDING_BOX}

            illegal_objects = [o for o in _root_obj if o.shape not in legal_shapes]  # type: ignore

            if illegal_objects:
                illegal_names = [f'Object(name="{o.name}", shape={o.shape})' for o in illegal_objects]  # type: ignore
                assert not illegal_objects, f"Illegal shapes in provided ontology objects: `{illegal_names}`"

            self.DataModel = create_objects_model(_root_obj)  # type: ignore

        else:
            # Classifications can be build into one
            classification_fields = dict([construct_fields(attr) for clf in _root_obj for attr in clf.attributes])
            self.DataModel: BaseModel = create_model("ClassificationModel", **classification_fields)  # type: ignore

    @property
    def model_json_schema(self) -> dict[str, Any]:
        return self.DataModel.model_json_schema()

    @property
    def model_json_schema_str(self) -> str:
        return json.dumps(self.model_json_schema)

    @overload
    def __call__(self: "OntologyDataModel[Classification]", answer: str) -> list[ClassificationInstance]: ...

    @overload
    def __call__(self: "OntologyDataModel[Object]", answer: str) -> ObjectInstance: ...

    def __call__(
        self: "OntologyDataModel[Classification] | OntologyDataModel[Object]", answer: str
    ) -> list[ClassificationInstance] | ObjectInstance:
        """
        Validate a json response in accordance to the pydantic model.

        This function allows you to convert from a json object (e.g., coming from an llm)
        back to the encord "instance format".

        Args:
            answer: The json object as a raw string.

        Returns: a list of classification / object instances that you will then
            have to add to a label row.

        """
        return self.validate_json(answer)

    @overload
    def validate_json(self: "OntologyDataModel[Classification]", answer_str: str) -> list[ClassificationInstance]: ...

    @overload
    def validate_json(self: "OntologyDataModel[Object]", answer_str: str) -> ObjectInstance: ...

    def validate_json(
        self: "OntologyDataModel[Classification] | OntologyDataModel[Object]", answer_str: str
    ) -> list[ClassificationInstance] | ObjectInstance:
        """
        Validate a json response in accordance to the pydantic model.

        This function allows you to convert from a json object (e.g., coming from an llm)
        back to the encord "instance format".

        Args:
            answer_str: The json object as a raw string.

        Returns: a list of classification / object instances that you will then
            have to add to a label row.

        """
        answer = self.DataModel.model_validate_json(answer_str)
        # ^ if classification has a property per top-level classification in the ontology

        if self.DataModel.__name__ == OBJECTS_RADIO_MODEL:  # type: ignore
            ont_obj = answer.get_ontology_object()  # type: ignore
            ins = ont_obj.create_instance()

            if ont_obj.attributes:
                for attr_key, attr_val in vars(answer).items():
                    if attr_key == "feature_node_hash":
                        continue
                    attr_val.set_answer(ins)
            return ins
        else:
            answers = []
            for attr_val in vars(answer).values():
                ont_cls = self.ontology_lookup[attr_val.feature_node_hash]
                ins = ont_cls.create_instance()
                attr_val.set_answer(ins)
                answers.append(ins)

            return answers
__call__
__call__(answer: str) -> list[ClassificationInstance]
__call__(answer: str) -> ObjectInstance
__call__(answer: str) -> list[ClassificationInstance] | ObjectInstance

Validate a json response in accordance to the pydantic model.

This function allows you to convert from a json object (e.g., coming from an llm) back to the encord "instance format".

Parameters:

  • answer (str) –

    The json object as a raw string.

a list of classification / object instances that you will then

  • list[ClassificationInstance] | ObjectInstance –

    have to add to a label row.

Source code in encord_agents/core/ontology.py
def __call__(
    self: "OntologyDataModel[Classification] | OntologyDataModel[Object]", answer: str
) -> list[ClassificationInstance] | ObjectInstance:
    """
    Validate a json response in accordance to the pydantic model.

    This function allows you to convert from a json object (e.g., coming from an llm)
    back to the encord "instance format".

    Args:
        answer: The json object as a raw string.

    Returns: a list of classification / object instances that you will then
        have to add to a label row.

    """
    return self.validate_json(answer)
validate_json
validate_json(answer_str: str) -> list[ClassificationInstance]
validate_json(answer_str: str) -> ObjectInstance
validate_json(answer_str: str) -> list[ClassificationInstance] | ObjectInstance

Validate a json response in accordance to the pydantic model.

This function allows you to convert from a json object (e.g., coming from an llm) back to the encord "instance format".

Parameters:

  • answer_str (str) –

    The json object as a raw string.

a list of classification / object instances that you will then

  • list[ClassificationInstance] | ObjectInstance –

    have to add to a label row.

Source code in encord_agents/core/ontology.py
def validate_json(
    self: "OntologyDataModel[Classification] | OntologyDataModel[Object]", answer_str: str
) -> list[ClassificationInstance] | ObjectInstance:
    """
    Validate a json response in accordance to the pydantic model.

    This function allows you to convert from a json object (e.g., coming from an llm)
    back to the encord "instance format".

    Args:
        answer_str: The json object as a raw string.

    Returns: a list of classification / object instances that you will then
        have to add to a label row.

    """
    answer = self.DataModel.model_validate_json(answer_str)
    # ^ if classification has a property per top-level classification in the ontology

    if self.DataModel.__name__ == OBJECTS_RADIO_MODEL:  # type: ignore
        ont_obj = answer.get_ontology_object()  # type: ignore
        ins = ont_obj.create_instance()

        if ont_obj.attributes:
            for attr_key, attr_val in vars(answer).items():
                if attr_key == "feature_node_hash":
                    continue
                attr_val.set_answer(ins)
        return ins
    else:
        answers = []
        for attr_val in vars(answer).values():
            ont_cls = self.ontology_lookup[attr_val.feature_node_hash]
            ins = ont_cls.create_instance()
            attr_val.set_answer(ins)
            answers.append(ins)

        return answers

encord_agents.core.rich_columns

TaskSpeedColumn

Bases: ProgressColumn

Renders human readable transfer speed.

Source code in encord_agents/core/rich_columns.py
class TaskSpeedColumn(ProgressColumn):
    """Renders human readable transfer speed."""

    def __init__(self, unit: str = "tasks") -> None:
        super().__init__()
        self.unit = unit

    def _format_speed(self, speed: float) -> str:
        resolution = "s" if speed > 1 / 60 else "m" if speed > 1 / 3600 else "h"
        if resolution == "m":
            speed /= 60
        elif resolution == "h":
            speed /= 3600
        return f"{speed:.2f} {self.unit}/{resolution}"

    def render(self, task: Task) -> Text:
        """Show data transfer speed."""
        speed = task.finished_speed or task.speed
        if speed is None:
            return Text("?", style="progress.data.speed")

        return Text(self._format_speed(speed), style="progress.data.speed")
render
render(task: Task) -> Text

Show data transfer speed.

Source code in encord_agents/core/rich_columns.py
def render(self, task: Task) -> Text:
    """Show data transfer speed."""
    speed = task.finished_speed or task.speed
    if speed is None:
        return Text("?", style="progress.data.speed")

    return Text(self._format_speed(speed), style="progress.data.speed")

encord_agents.core.settings

Settings used throughout the module.

Note that central settings will be read via environment variables.

Settings

Bases: BaseSettings

Source code in encord_agents/core/settings.py
class Settings(BaseSettings):
    ssh_key_file: Optional[Path] = Field(validation_alias="ENCORD_SSH_KEY_FILE", default=None)
    """
    The path to the private ssh key file to authenticate with Encord.

    Either this or the `ENCORD_SSH_KEY` needs to be set for most use-cases.
    To setup a key with Encord, please see
    [the platform docs](https://docs.encord.com/platform-documentation/Annotate/annotate-api-keys).
    """
    ssh_key_content: Optional[str] = Field(validation_alias="ENCORD_SSH_KEY", default=None)
    """
    The content of the private ssh key file to authenticate with Encord.

    Either this or the `ENCORD_SSH_KEY` needs to be set for most use-cases.
    To setup a key with Encord, please see
    [the platform docs](https://docs.encord.com/platform-documentation/Annotate/annotate-api-keys).
    """
    domain: Optional[str] = Field(validation_alias="ENCORD_DOMAIN", default=None)

    @field_validator("ssh_key_content")
    @classmethod
    def check_key_content(cls, content: str | None) -> str | None:
        if content is None:
            return content

        if os.path.exists(content):
            raise PrintableError(
                f"The env variable `[blue]ENCORD_SSH_KEY[/blue]` (={content}) is set with a value that looks like a path and not ssh key content. Did you mean to set the `[blue]ENCORD_SSH_KEY_FILE[/blue]` environment variable with the private key file content directly?"
            )

        return content

    @field_validator("ssh_key_file")
    @classmethod
    def check_path_expand_and_exists(cls, path: Path | None) -> Path | None:
        if path is None:
            return path

        path = path.expanduser()

        if not path.is_file():
            raise PrintableError(
                "The env variable `[blue]ENCORD_SSH_KEY_FILE[/blue]` is set with a value that could not be found in the file system. Did you mean to set the `[blue]ENCORD_SSH_KEY[/blue]` environment variable with the private key file content directly?"
            )

        return path

    @model_validator(mode="after")
    def check_key(self: "Settings") -> "Settings":
        if not any(map(bool, [self.ssh_key_content, self.ssh_key_file])):
            raise PrintableError(
                f"Must specify either `[blue]ENCORD_SSH_KEY_FILE[/blue]` or `[blue]ENCORD_SSH_KEY[/blue]` env variables. If you don't have an ssh key, please refer to our docs:{os.linesep}[magenta]https://docs.encord.com/platform-documentation/Annotate/annotate-api-keys#creating-keys-using-terminal-powershell[/magenta]"
            )

        if all(map(bool, [self.ssh_key_file, self.ssh_key_content])):
            import warnings

            warnings.warn(
                "You have configured both the `ENCORD_SSH_KEY` and `ENCORD_SSH_KEY_FILE`. The `ENCORD_SSH_KEY` will take precedence."
            )

        return self

    @property
    def ssh_key(self) -> str:
        if self.ssh_key_content is None:
            if self.ssh_key_file is None:
                raise ValueError("Both ssh key content and ssh key file is None")
            self.ssh_key_content = self.ssh_key_file.read_text()
        return self.ssh_key_content

    def __hash__(self) -> int:
        return hash((self.ssh_key_content, self.ssh_key_file, self.domain))
ssh_key_content class-attribute instance-attribute
ssh_key_content: Optional[str] = Field(validation_alias='ENCORD_SSH_KEY', default=None)

The content of the private ssh key file to authenticate with Encord.

Either this or the ENCORD_SSH_KEY needs to be set for most use-cases. To setup a key with Encord, please see the platform docs.

ssh_key_file class-attribute instance-attribute
ssh_key_file: Optional[Path] = Field(validation_alias='ENCORD_SSH_KEY_FILE', default=None)

The path to the private ssh key file to authenticate with Encord.

Either this or the ENCORD_SSH_KEY needs to be set for most use-cases. To setup a key with Encord, please see the platform docs.

encord_agents.core.utils

batch_iterator

batch_iterator(iterator: Iterable[T], batch_size: int) -> Iterable[List[T]]

Yield batches of items from an iterator.

Parameters:

  • iterator (Iterable[T]) –

    The source iterator

  • batch_size (int) –

    Size of each batch > 0

Returns:

  • Iterable[List[T]] –

    Iterable of lists, each containing up to batch_size items

Source code in encord_agents/core/utils.py
def batch_iterator(iterator: Iterable[T], batch_size: int) -> Iterable[List[T]]:
    """Yield batches of items from an iterator.

    Args:
        iterator: The source iterator
        batch_size: Size of each batch > 0

    Returns:
        Iterable of lists, each containing up to batch_size items
    """
    iterator = iter(iterator)  # Ensure we have an iterator
    while True:
        batch = []
        for _ in range(batch_size):
            try:
                batch.append(next(iterator))
            except StopIteration:
                break
        if not batch:
            break
        yield batch

download_asset

download_asset(storage_item: StorageItem, frame: int | None = None) -> Generator[Path, None, None]

Download the asset associated to a label row to disk.

This function is a context manager. Data will be cleaned up when the context is left.

Example usage:

with download_asset(storage_item, 10) as asset_path:
    # In here the file exists
    pixel_values = np.asarray(Image.open(asset_path))

# outside, it will be cleaned up

Parameters:

  • storage_item (StorageItem) –

    The Storage item for which you want to download the associated asset.

  • frame (int | None, default: None ) –

    The frame that you need. If frame is none for a video, you will get the video path.

Raises:

  • NotImplementedError –

    If you try to get all frames of an image group.

  • ValueError –

    If you try to download an unsupported data type (e.g., DICOM).

Yields:

  • Path –

    The file path for the requested asset.

Source code in encord_agents/core/utils.py
@contextmanager
def download_asset(storage_item: StorageItem, frame: int | None = None) -> Generator[Path, None, None]:
    """
    Download the asset associated to a label row to disk.

    This function is a context manager. Data will be cleaned up when the context is left.

    Example usage:

        with download_asset(storage_item, 10) as asset_path:
            # In here the file exists
            pixel_values = np.asarray(Image.open(asset_path))

        # outside, it will be cleaned up

    Args:
        storage_item: The Storage item for which you want to download the associated asset.
        frame: The frame that you need. If frame is none for a video, you will get the video path.

    Raises:
        NotImplementedError: If you try to get all frames of an image group.
        ValueError: If you try to download an unsupported data type (e.g., DICOM).


    Yields:
        The file path for the requested asset.

    """
    url = storage_item.get_signed_url()

    if storage_item.item_type == StorageItemType.IMAGE_GROUP:
        if frame is None:
            # Can only download the whole image sequences - not image groups.
            raise NotImplementedError(DOWNLOAD_NATIVE_IMAGE_GROUP_WO_FRAME_ERROR_MESSAGE)

        child_storage_items = list(storage_item.get_child_items(get_signed_urls=True))
        assert len(child_storage_items) > frame, "The requested frame in the Image Group does not exist"
        url = child_storage_items[frame].get_signed_url()

    if url is None:
        raise ValueError("Failed to get a signed url for the asset")

    file_type, suffix = _guess_file_suffix(url, storage_item)
    response = requests.get(url)
    response.raise_for_status()

    with TemporaryDirectory() as dir_name:
        dir_path = Path(dir_name)

        file_path = dir_path / f"{storage_item.uuid}{suffix}"
        with open(file_path, "wb") as f:
            for chunk in response.iter_content(chunk_size=4096):
                if chunk:
                    f.write(chunk)

        if file_type == "video" and frame is not None:  # Get that exact frame
            from .video import get_frame, write_frame

            frame_content = get_frame(file_path, frame)
            frame_file = file_path.with_name(f"{file_path.name}_{frame}").with_suffix(".png")
            write_frame(frame_file, frame_content)
            file_path = frame_file

        yield file_path

get_frame_count

get_frame_count(storage_item: StorageItem) -> int

Get the number of frames in a video.

Source code in encord_agents/core/utils.py
def get_frame_count(storage_item: StorageItem) -> int:
    """
    Get the number of frames in a video.
    """
    if storage_item.item_type != StorageItemType.VIDEO:
        raise ValueError("This function only supports video storage items")
    if storage_item.frame_count is not None:
        return storage_item.frame_count
    if storage_item.duration is not None and storage_item.fps is not None:
        return int(storage_item.duration * storage_item.fps)
    raise ValueError("Frame count is not available for this storage item, missing: frame_count or duration and fps")

get_initialised_label_row

get_initialised_label_row(frame_data: FrameData, include_args: LabelRowMetadataIncludeArgs | None = None, init_args: LabelRowInitialiseLabelsArgs | None = None) -> LabelRowV2

Get an initialised label row from the frame_data information.

Parameters:

  • frame_data (FrameData) –

    The data pointing to the data asset.

Raises:

  • Exception –

    If the frame_data cannot be matched to a label row

Returns:

  • LabelRowV2 –

    The initialized label row.

Source code in encord_agents/core/utils.py
def get_initialised_label_row(
    frame_data: FrameData,
    include_args: LabelRowMetadataIncludeArgs | None = None,
    init_args: LabelRowInitialiseLabelsArgs | None = None,
) -> LabelRowV2:
    """
    Get an initialised label row from the frame_data information.

    Args:
        frame_data: The data pointing to the data asset.

    Raises:
        Exception: If the `frame_data` cannot be matched to a label row

    Returns:
        The initialized label row.

    """
    user_client = get_user_client()
    project = user_client.get_project(str(frame_data.project_hash))
    include_args = include_args or LabelRowMetadataIncludeArgs()
    init_args = init_args or LabelRowInitialiseLabelsArgs()
    matched_lrs = project.list_label_rows_v2(data_hashes=[frame_data.data_hash], **include_args.model_dump())
    num_matches = len(matched_lrs)
    if num_matches > 1:
        raise Exception(f"Non unique match: matched {num_matches} label rows!")
    elif num_matches == 0:
        raise Exception("No label rows were matched!")
    lr = matched_lrs.pop()
    lr.initialise_labels(**init_args.model_dump())
    return lr

get_user_client

get_user_client(settings: Settings | None = None) -> EncordUserClient

Generate an user client to access Encord.

Returns:

  • EncordUserClient –

    An EncordUserClient authenticated with the credentials from the encord_agents.core.settings.Settings.

Source code in encord_agents/core/utils.py
def get_user_client(settings: Settings | None = None) -> EncordUserClient:
    """
    Generate an user client to access Encord.

    Returns:
        An EncordUserClient authenticated with the credentials from the encord_agents.core.settings.Settings.

    """
    settings = settings or Settings()
    return get_user_client_from_settings(settings)

encord_agents.core.video

get_frame

get_frame(video_path: Path, desired_frame: int) -> NDArray[np.uint8]

Extract an exact frame from a video.

Parameters:

  • video_path (Path) –

    The file path to where the video is stored.

  • desired_frame (int) –

    The frame to extract

Raises:

  • Exception –

    If the video cannot be opened properly or the requested frame could not be retrieved from the video.

Returns:

  • NDArray[uint8] –

    Numpy array of shape [h, w, c] where channels are BGR.

Source code in encord_agents/core/video.py
def get_frame(video_path: Path, desired_frame: int) -> NDArray[np.uint8]:
    """
    Extract an exact frame from a video.

    Args:
        video_path: The file path to where the video is stored.
        desired_frame: The frame to extract

    Raises:
        Exception:  If the video cannot be opened properly or the requested
            frame could not be retrieved from the video.

    Returns:
        Numpy array of shape [h, w, c] where channels are BGR.

    """
    cap = cv2.VideoCapture(video_path.as_posix())
    if not cap.isOpened():
        raise Exception("Error opening video file.")

    cap.set(cv2.CAP_PROP_POS_FRAMES, desired_frame)

    ret, frame = cap.read()
    if not ret:
        raise Exception("Error retrieving frame.")

    cap.release()
    return frame.astype(np.uint8)

iter_video

iter_video(video_path: Path) -> Iterator[Frame]

Iterate video frame by frame.

Parameters:

  • video_path (Path) –

    The file path to the video you wish to iterate.

Raises:

  • Exception –

    If the video file could not be opened properly.

Yields:

  • Frame –

    Frames from the video.

Source code in encord_agents/core/video.py
def iter_video(video_path: Path) -> Iterator[Frame]:
    """
    Iterate video frame by frame.

    Args:
        video_path: The file path to the video you wish to iterate.

    Raises:
        Exception: If the video file could not be opened properly.

    Yields:
        Frames from the video.

    """
    cap = cv2.VideoCapture(video_path.as_posix())
    if not cap.isOpened():
        raise Exception("Error opening video file.")

    frame_num = 0
    ret, frame = cap.read()
    while ret:
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        yield Frame(frame=frame_num, content=rgb_frame.astype(np.uint8))

        ret, frame = cap.read()
        frame_num += 1

    cap.release()

iter_video_with_indices

iter_video_with_indices(video_path: Path, frame_indices: Iterable[int]) -> Iterator[Frame]

Iterate video frame by frame with specified frame indices.

Parameters:

  • video_path (Path) –

    The file path to the video you wish to iterate.

  • frame_indices (Iterable[int]) –

    The frame indices to iterate over.

Yields:

  • Frame –

    Frames from the video.

Source code in encord_agents/core/video.py
def iter_video_with_indices(video_path: Path, frame_indices: Iterable[int]) -> Iterator[Frame]:
    """
    Iterate video frame by frame with specified frame indices.

    Args:
        video_path: The file path to the video you wish to iterate.
        frame_indices: The frame indices to iterate over.

    Yields:
        Frames from the video.

    """
    if not video_path.exists():
        raise Exception("Video file does not exist.")
    cap = cv2.VideoCapture(video_path.as_posix())
    if not cap.isOpened():
        raise Exception("Error opening video file.")

    for frame_num in frame_indices:
        # Set the frame position before reading
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
        ret, frame = cap.read()
        if not ret:
            break

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        yield Frame(frame=frame_num, content=rgb_frame.astype(np.uint8))

    cap.release()

write_frame

write_frame(frame_path: Path, frame: NDArray[np.uint8]) -> None

Write a frame to a file.

Parameters:

  • frame_path (Path) –

    The file path to write the frame to.

  • frame (NDArray[uint8]) –

    The frame to write.

Source code in encord_agents/core/video.py
def write_frame(frame_path: Path, frame: NDArray[np.uint8]) -> None:
    """
    Write a frame to a file.

    Args:
        frame_path: The file path to write the frame to.
        frame: The frame to write.

    """
    cv2.imwrite(frame_path.as_posix(), frame)

encord_agents.core.vision

b64_encode_image

b64_encode_image(img: NDArray[np.uint8], format: Base64Formats = '.jpg') -> str

Encode an image to a base64 string.

Parameters:

  • img (NDArray[uint8]) –

    The image to encode. Expects [RGB] channels

  • format (Base64Formats, default: '.jpg' ) –

    The format of the image.

Returns:

  • str –

    The base64 encoded image.

Source code in encord_agents/core/vision.py
def b64_encode_image(img: NDArray[np.uint8], format: Base64Formats = ".jpg") -> str:
    """
    Encode an image to a base64 string.

    Args:
        img: The image to encode. Expects [RGB] channels
        format: The format of the image.

    Returns:
        The base64 encoded image.
    """
    coerced_color_img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    _, encoded_image = cv2.imencode(format, coerced_color_img)
    return base64.b64encode(encoded_image).decode("utf-8")  # type: ignore