A generic directory class representing a directory with files of a specified format. Provides both async and sync interfaces for directory operations. All methods without _sync suffix are async.
Attributes
| Attribute | Type | Description |
|---|
| path | str | The path to the directory (can be local or remote). |
| name | Optional[str] = null | Optional name for the directory (defaults to basename of path). |
| format | str = "" | A string representing the format of the files contained within the directory. |
| hash | Optional[str] = null | Optional precomputed hash value used for cache key computation when the directory is used as an input to discoverable tasks. |
Constructor
Signature
def Dir(
path: str,
name: Optional[str] = null,
format: str = "",
hash: Optional[str] = null
)
Parameters
| Name | Type | Description |
|---|
| path | str | The path to the directory, which can be local or remote. |
| name | Optional[str] = null | Optional name for the directory; defaults to the basename of the path if not provided. |
| format | str = "" | The format type of the files contained within the directory. |
| hash | Optional[str] = null | Optional precomputed hash value used for cache key computation. |
Methods
pre_init()
@classmethod
def pre_init(
data: dict
) - > dict
Internal: Pydantic validator to set default name from path. Not intended for direct use.
Parameters
| Name | Type | Description |
|---|
| data | dict | The raw input data used to initialize the Dir instance |
Returns
| Type | Description |
|---|
dict | The validated data dictionary with the name field populated if it was missing |
is_empty()
@classmethod
def is_empty() - > bool
True when this is a sentinel Dir produced by :class:EmptyDir/Dir.empty() — i.e. the task didn't actually produce a directory.
Returns
| Type | Description |
|---|
bool | True if the directory path matches the empty sentinel, False otherwise |
empty()
@classmethod
def empty() - > [Dir](dir.md?sid=flyte_io__dir_dir)
Return a sentinel Dir representing 'no directory was produced'.
Returns
| Type | Description |
|---|
[Dir](dir.md?sid=flyte_io__dir_dir) | An EmptyDir sentinel instance used to represent the absence of a directory in Flyte tasks |
lazy_uploader()
@classmethod
def lazy_uploader() - > Callable | None
Gets or sets the asynchronous uploader function used to transfer local files to remote storage when in remote mode.
Returns
| Type | Description |
|---|
| `Callable | None` |
schema_match()
@classmethod
def schema_match(
incoming: dict
) - > bool
Internal: Check if incoming schema matches Dir schema. Not intended for direct use.
Parameters
| Name | Type | Description |
|---|
| incoming | dict | The schema dictionary to validate against the Dir class schema |
Returns
| Type | Description |
|---|
bool | True if the incoming dictionary matches the required schema structure for a Dir object |
walk()
@classmethod
def walk(
recursive: bool = True,
max_depth: Optional[int] = None
) - > AsyncIterator[[File](../file/file.md?sid=flyte_io__file_file)[T]]
Asynchronously walk through the directory and yield File objects.
Parameters
| Name | Type | Description |
|---|
| recursive | bool = True | If True, recursively walk subdirectories. If False, only list files in the top-level directory. |
| max_depth | Optional[int] = None | Maximum depth for recursive walking. If None, walk through all subdirectories. |
Returns
| Type | Description |
|---|
AsyncIterator[[File](../file/file.md?sid=flyte_io__file_file)[T]] | An async iterator yielding File objects for each file found in the directory |
walk_sync()
@classmethod
def walk_sync(
recursive: bool = True,
file_pattern: str = "*",
max_depth: Optional[int] = None
) - > Iterator[[File](../file/file.md?sid=flyte_io__file_file)[T]]
Synchronously walk through the directory and yield File objects.
Parameters
| Name | Type | Description |
|---|
| recursive | bool = True | If True, recursively walk subdirectories. If False, only list files in the top-level directory. |
| file_pattern | str = "*" | Glob pattern to filter files (e.g., ".txt", ".csv"). Default is "*" (all files). |
| max_depth | Optional[int] = None | Maximum depth for recursive walking. If None, walk through all subdirectories. |
Returns
| Type | Description |
|---|
Iterator[[File](../file/file.md?sid=flyte_io__file_file)[T]] | A synchronous iterator yielding File objects for each file found in the directory |
list_files()
@classmethod
def list_files() - > List[[File](../file/file.md?sid=flyte_io__file_file)[T]]
Asynchronously get a list of all files in the directory (non-recursive).
Returns
| Type | Description |
|---|
List[[File](../file/file.md?sid=flyte_io__file_file)[T]] | A list of File objects for files in the top-level directory |
list_files_sync()
@classmethod
def list_files_sync() - > List[[File](../file/file.md?sid=flyte_io__file_file)[T]]
Synchronously get a list of all files in the directory (non-recursive).
Returns
| Type | Description |
|---|
List[[File](../file/file.md?sid=flyte_io__file_file)[T]] | A list of File objects for files in the top-level directory |
download()
@classmethod
def download(
local_path: Optional[Union[str, Path]] = None
) - > str
Asynchronously download the entire directory to a local path.
Parameters
| Name | Type | Description |
|---|
| local_path | Optional[Union[str, Path]] = None | The local path to download the directory to. If None, a temporary directory will be used. |
Returns
| Type | Description |
|---|
str | The absolute path to the downloaded directory on the local filesystem |
download_sync()
@classmethod
def download_sync(
local_path: Optional[Union[str, Path]] = None
) - > str
Synchronously download the entire directory to a local path.
Parameters
| Name | Type | Description |
|---|
| local_path | Optional[Union[str, Path]] = None | The local path to download the directory to. If None, a temporary directory will be used. |
Returns
| Type | Description |
|---|
str | The absolute path to the downloaded directory on the local filesystem |
from_local()
@classmethod
def from_local(
local_path: Union[str, Path],
remote_destination: Optional[str] = None,
dir_cache_key: Optional[str] = None,
batch_size: Optional[int] = None
) - > [Dir](dir.md?sid=flyte_io__dir_dir)[T]
Asynchronously create a new Dir by uploading a local directory to remote storage.
Parameters
| Name | Type | Description |
|---|
| local_path | Union[str, Path] | Path to the local directory to be uploaded |
| remote_destination | Optional[str] = None | Optional remote path to store the directory. If None, a path will be automatically generated. |
| dir_cache_key | Optional[str] = None | Optional precomputed hash value to use for cache key computation. |
| batch_size | Optional[int] = None | Optional concurrency limit for uploading files. |
Returns
| Type | Description |
|---|
[Dir](dir.md?sid=flyte_io__dir_dir)[T] | A new Dir instance pointing to the uploaded remote directory |
from_local_sync()
@classmethod
def from_local_sync(
local_path: Union[str, Path],
remote_destination: Optional[str] = None,
dir_cache_key: Optional[str] = None
) - > [Dir](dir.md?sid=flyte_io__dir_dir)[T]
Synchronously create a new Dir by uploading a local directory to remote storage.
Parameters
| Name | Type | Description |
|---|
| local_path | Union[str, Path] | Path to the local directory to be uploaded |
| remote_destination | Optional[str] = None | Optional remote path to store the directory. If None, a path will be automatically generated. |
| dir_cache_key | Optional[str] = None | Optional precomputed hash value to use for cache key computation. |
Returns
| Type | Description |
|---|
[Dir](dir.md?sid=flyte_io__dir_dir)[T] | A new Dir instance pointing to the uploaded remote directory |
new_remote()
@classmethod
def new_remote(
dir_name: Optional[str] = None,
hash: Optional[str] = None
) - > [Dir](dir.md?sid=flyte_io__dir_dir)[T]
Create a new Dir reference for a remote directory that will be written to.
Parameters
| Name | Type | Description |
|---|
| dir_name | Optional[str] = None | Optional name for the remote directory. If not set, a generated name will be used. |
| hash | Optional[str] = None | Optional precomputed hash value to use for cache key computation. |
Returns
| Type | Description |
|---|
[Dir](dir.md?sid=flyte_io__dir_dir)[T] | A new Dir instance with a generated remote path for writing data |
from_existing_remote()
@classmethod
def from_existing_remote(
remote_path: str,
dir_cache_key: Optional[str] = None
) - > [Dir](dir.md?sid=flyte_io__dir_dir)[T]
Create a Dir reference from an existing remote directory.
Parameters
| Name | Type | Description |
|---|
| remote_path | str | The remote path to the existing directory (e.g., s3://bucket/path) |
| dir_cache_key | Optional[str] = None | Optional hash value to use for cache key computation. |
Returns
| Type | Description |
|---|
[Dir](dir.md?sid=flyte_io__dir_dir)[T] | A new Dir instance pointing to the existing remote directory |
exists()
@classmethod
def exists() - > bool
Asynchronously check if the directory exists.
Returns
| Type | Description |
|---|
bool | True if the directory exists in the underlying storage, False otherwise |
exists_sync()
@classmethod
def exists_sync() - > bool
Synchronously check if the directory exists.
Returns
| Type | Description |
|---|
bool | True if the directory exists in the underlying storage, False otherwise |
get_file()
@classmethod
def get_file(
file_name: str
) - > Optional[[File](../file/file.md?sid=flyte_io__file_file)[T]]
Asynchronously get a specific file from the directory by name.
Parameters
| Name | Type | Description |
|---|
| file_name | str | The name of the file to retrieve from the directory |
Returns
| Type | Description |
|---|
Optional[[File](../file/file.md?sid=flyte_io__file_file)[T]] | A File instance if the file exists at the constructed path, None otherwise |
get_file_sync()
@classmethod
def get_file_sync(
file_name: str
) - > Optional[[File](../file/file.md?sid=flyte_io__file_file)[T]]
Synchronously get a specific file from the directory by name.
Parameters
| Name | Type | Description |
|---|
| file_name | str | The name of the file to retrieve from the directory |
Returns
| Type | Description |
|---|
Optional[[File](../file/file.md?sid=flyte_io__file_file)[T]] | A File instance if the file exists at the constructed path, None otherwise |