For Publishers#

Local File Source#

class LocalFileSource(path: str | List[str], format: str | dict | FileFormat = None, initial_last_modified: str | datetime = None)[source]#

Bases: Input

Class for managing the configuration of local-file-based data inputs.

format#

The format of the file. If not provided, it will be inferred from the file extension of the data.

Type:

FileFormat

path#

The path where the files can be found. It can be a single path or a list of paths.

Type:

str | List[str]

initial_last_modified#

If not None, only the files modified after this date and time will be considered.

Type:

str | None

to_dict()[source]#

Converts the LocalFileSource object to a dictionary.

property format: FileFormat#

The format of the file or files. If not provided, it will be inferred from the file extension in the path.

Type:

FileFormat

property initial_last_modified: str#

The date and time after which the files were modified.

Type:

str

property path: str | List[str]#

The path or paths to the files to load.

Type:

str | List[str]

S3 Source#

class S3Source(uri: str | List[str], credentials: dict | S3Credentials, format: str | dict | FileFormat = None, initial_last_modified: str | datetime = None, region: str = None)[source]#

Bases: Input

Class for managing the configuration of S3-file-based data inputs.

format#

The format of the file. If not provided, it will be inferred from the file extension of the data.

Type:

FileFormat

uri#

The URI of the files with format: ‘s3://path/to/files’. It can be a single URI or a list of URIs.

Type:

str | List[str]

credentials#

The credentials required to access the S3 bucket.

Type:

S3Credentials

initial_last_modified#

If provided, only the files modified after this date and time will be considered.

Type:

str | datetime.datetime

to_dict()[source]#

Converts the S3Source object to a dictionary.

property credentials: S3Credentials#

The credentials required to access the S3 bucket.

Type:

S3Credentials

property format: FileFormat#

The format of the file. If not provided, it will be inferred from the file.

Type:

FileFormat

property initial_last_modified: str#

The date and time after which the files were modified.

Type:

str

property region: str | None#

The region where the S3 bucket is located.

Type:

str

property uri: str | List[str]#

‘s3://path/to/files’.

Type:

str | List[str]

Type:

The URI of the files with format

Azure Source#

class AzureSource(uri: str | List[str], credentials: dict | AzureCredentials, format: str | dict | FileFormat = None, initial_last_modified: str | datetime = None)[source]#

Bases: Input

Class for managing the configuration of Azure-file-based data inputs.

format#

The format of the file. If not provided, it will be inferred from the file extension of the data.

Type:

FileFormat

uri#

The URI of the files with format: ‘az://path/to/files’. It can be a single URI or a list of URIs.

Type:

str | List[str]

credentials#

The credentials required to access Azure.

Type:

AzureCredentials

initial_last_modified#

If provided, only the files modified after this date and time will be considered.

Type:

str | datetime.datetime

to_dict()[source]#

Converts the AzureSource object to a dictionary.

property credentials: AzureCredentials#

The credentials required to access Azure.

Type:

AzureCredentials

property format: FileFormat#

The format of the file. If not provided, it will be inferred from the file extension of the data.

Type:

FileFormat

property initial_last_modified: str#

The date and time after which the files were modified.

Type:

str

property uri: str | List[str]#

‘az://path/to/files’.

Type:

str | List[str]

Type:

The URI of the files with format

MySQL Source#

class MySQLSource(uri: str, query: str | List[str], credentials: dict | UserPasswordCredentials | None = None, initial_values: dict | None = None)[source]#

Bases: Input

Class for managing the configuration of MySQL-based data inputs.

credentials#

The credentials required to access the MySQL database.

Type:

UserPasswordCredentials

initial_values#

The initial values for the parameters in the SQL queries.

Type:

dict

query#

The SQL query(s) to execute. If multiple queries are provided, they must be provided as a dictionary, with the parameter name in the registered function as the key and the SQL query as the value.

Type:

str | List[str]

uri#

The URI of the database where the data is located.

Type:

str

to_dict()[source]#

Converts the MySQLSource object to a dictionary.

property credentials: UserPasswordCredentials | None#

The credentials required to access the MySQLDatabase. If no credentials were provided, it will return None.

Type:

UserPasswordCredentials | None

property initial_values: dict#

The initial values for the parameters in the SQL queries.

Type:

dict

property query: str | List[str]#

The SQL query(s) to execute.

Type:

str | List[str]

property uri: str#

The URI of the database where the data is located.

Type:

str

MariaDB Source#

class MariaDBSource(uri: str, query: str | List[str], credentials: dict | UserPasswordCredentials | None = None, initial_values: dict | None = None)[source]#

Bases: Input

Class for managing the configuration of MariaDB-based data inputs.

credentials#

The credentials required to access the MariaDB database.

Type:

UserPasswordCredentials

initial_values#

The initial values for the parameters in the SQL queries.

Type:

dict

query#

The SQL query(s) to execute. If multiple queries are provided, they must be provided as a dictionary, with the parameter name in the registered function as the key and the SQL query as the value.

Type:

str | List[str]

uri#

The URI of the database where the data is located.

Type:

str

to_dict()[source]#

Converts the MariaDBSource object to a dictionary.

property credentials: UserPasswordCredentials | None#

The credentials required to access MariaDB. If no credentials were provided, it will return None.

Type:

UserPasswordCredentials | None

property initial_values: dict#

The initial values for the parameters in the SQL queries.

Type:

dict

property query: str | List[str]#

The SQL query(s) to execute.

Type:

str | List[str]

property uri: str#

The URI of the database where the data is located.

Type:

str

Postgres Source#

class PostgresSource(uri: str, query: str | List[str], credentials: dict | UserPasswordCredentials | None = None, initial_values: dict | None = None)[source]#

Bases: Input

Class for managing the configuration of Postgres-based data inputs.

credentials#

The credentials required to access the Postgres database.

Type:

UserPasswordCredentials

initial_values#

The initial values for the parameters in the SQL queries.

Type:

dict

query#

The SQL query(s) to execute. If multiple queries are provided, they must be provided as a dictionary, with the parameter name in the registered function as the key and the SQL query as the value.

Type:

str | List[str]

uri#

The URI of the database where the data is located.

Type:

str

to_dict()[source]#

Converts the PostgresSource object to a dictionary.

property credentials: UserPasswordCredentials | None#

The credentials required to access the PostgresDatabase. If no credentials were provided, it will return None.

Type:

UserPasswordCredentials | None

property initial_values: dict#

The initial values for the parameters in the SQL queries.

Type:

dict

property query: str | List[str]#

The SQL query(s) to execute.

Type:

str | List[str]

property uri: str#

The URI of the database where the data is located.

Type:

str

Oracle Source#

class OracleSource(uri: str, query: str | List[str], credentials: dict | UserPasswordCredentials | None = None, initial_values: dict | None = None)[source]#

Bases: Input

Class for managing the configuration of Oracle-based data inputs.

credentials#

The credentials required to access the Oracle database.

Type:

UserPasswordCredentials

initial_values#

The initial values for the parameters in the SQL queries.

Type:

dict

query#

The SQL query(s) to execute. If multiple queries are provided, they must be provided as a dictionary, with the parameter name in the registered function as the key and the SQL query as the value.

Type:

str | List[str]

uri#

The URI of the database where the data is located.

Type:

str

to_dict()[source]#

Converts the OracleSource object to a dictionary.

property credentials: UserPasswordCredentials | None#

The credentials required to access Oracle. If no credentials were provided, it will return None.

Type:

UserPasswordCredentials | None

property initial_values: dict#

The initial values for the parameters in the SQL queries.

Type:

dict

property query: str | List[str]#

The SQL query(s) to execute.

Type:

str | List[str]

property uri: str#

The URI of the database where the data is located.

Type:

str

Custom Source (Plugin)#

class SourcePlugin[source]#

Bases: ABC

Abstract class for input plugins.

trigger_input(working_dir

str) -> Union[str, Tuple[str, …], List[str]] Trigger the import of the data. The method will receive a folder where it must store the data as parquet files, and return a list of the paths of the files created. This files will then be loaded and mapped to the dataset function in positional order, so if you want file.parquet to be the first argument of the dataset function, you must return it first. If you want a parameter to receive multiple files, return a list of the paths. For example, you would give the following return to provide a first argument with a single file and a second argument with two files: return [“file1.parquet”, [“file2.parquet”, “file3.parquet”]]

property initial_values: dict#

Return a dictionary with the initial values to be stored after execution of the plugin. They will be accessible in the next execution of the plugin. The dictionary must have the parameter names as keys and the initial values as values, all the type string.

Returns:

A dictionary with the initial values of the parameters of the plugin.

Return type:

dict

abstract trigger_input(working_dir: str) str | Tuple[str, ...] | List[str][source]#
Trigger the import of the data. This must be implemented in any class that

inherits from this class. The method will receive a folder where it must store the data as parquet files, and return a list of the paths of the files created. This files will then be loaded and mapped to the dataset function in positional order, so if you want file.parquet to be the first argument of the dataset function, you must return it first. If you want a parameter to receive multiple files, return a list of the paths. For example, you would give the following return to provide a first argument with a single file and a second argument with two files: return [“file1.parquet”, [“file2.parquet”, “file3.parquet”]]

Parameters:

working_dir (str) – The folder where the files must be stored

Returns:

The path of the file(s) created, in

the order they must be mapped to the dataset function

Return type:

Union[str, Tuple[str, …], List[str]]

For Subscribers#

Local File Destination#

class LocalFileDestination(path: str | List[str], format: str | dict | FileFormat = None)[source]#

Bases: Output

property format: FileFormat#

The format of the file or files. If not provided, it will be inferred from the file extension in the path.

Type:

FileFormat

property path: str | List[str]#

The path or paths to store the files.

Type:

str | List[str]

S3 Destination#

class S3Destination(uri: str | List[str], credentials: dict | S3Credentials, format: str | dict | FileFormat = None, region: str = None)[source]#

Bases: Output

Class for managing the configuration of S3-file-based data outputs.

format#

The format of the file. If not provided, it will be inferred from the file extension.

Type:

FileFormat

uri#

The URI of the files with format: ‘s3://path/to/files’. It can be a single URI or a list of URIs.

Type:

str | List[str]

credentials#

The credentials required to access the S3 bucket.

Type:

S3Credentials

to_dict()[source]#

Converts the S3Destination object to a dictionary.

property credentials: S3Credentials#

The credentials required to access the S3 bucket.

Type:

S3Credentials

property format: FileFormat#

The format of the file. If not provided, it will be inferred from the file.

Type:

FileFormat

property region: str | None#

The region where the S3 bucket is located.

Type:

str

property uri: str | List[str]#

‘s3://path/to/files’.

Type:

str | List[str]

Type:

The URI of the files with format

Azure Destination#

class AzureDestination(uri: str | List[str], credentials: dict | AzureCredentials, format: str | dict | FileFormat = None)[source]#

Bases: Output

Class for managing the configuration of Azure-file-based data outputs.

format#

The format of the file to be created. If not provided, it will be inferred from the file extension.

Type:

FileFormat

uri#

The URI of the files with format: ‘az://path/to/files’. It can be a single URI or a list of URIs.

Type:

str | List[str]

credentials#

The credentials required to access Azure.

Type:

AzureCredentials

to_dict()[source]#

Converts the AzureDestination object to a dictionary.

property credentials: AzureCredentials#

The credentials required to access Azure.

Type:

AzureCredentials

property format: FileFormat#

The format of the file. If not provided, it will be inferred from the file extension of the URI.

Type:

FileFormat

property uri: str | List[str]#

‘az://path/to/files’.

Type:

str | List[str]

Type:

The URI of the files with format

MySQL Destination#

class MySQLDestination(uri: str, destination_table: List[str] | str, credentials: dict | UserPasswordCredentials = None, if_table_exists: Literal['append', 'replace'] = 'append')[source]#

Bases: Output

Class for managing the configuration of MySQL-based data outputs.

credentials#

The credentials required to access the MySQL database.

Type:

UserPasswordCredentials

destination_table#

The table(s) to create. If multiple tables are provided, they must be provided as a list.

Type:

str | List[str]

if_table_exists#

The strategy to follow when the table already exists. - ‘replace’ will create a new database table, overwriting an existing one. - ‘append’ will append to an existing table.

Type:

{‘append’, ‘replace’}

uri#

The URI of the database where the data is going to be stored.

Type:

str

to_dict()[source]#

Converts the MySQLDestination object to a dictionary

property credentials: UserPasswordCredentials#

The credentials required to access the MySQLDatabase.

Type:

UserPasswordCredentials

property destination_table: str | List[str]#

The table(s) to create. If multiple tables are provided, they must be provided as a list.

Type:

str | List[str]

property if_table_exists: Literal['append', 'replace']#

The strategy to follow when the table already exists.

Type:

str

property uri: str#

The URI of the database where the data is going to be stored.

Type:

str

Maria DBDestination#

class MariaDBDestination(uri: str, destination_table: List[str] | str, credentials: dict | UserPasswordCredentials = None, if_table_exists: Literal['append', 'replace'] = 'append')[source]#

Bases: Output

Class for managing the configuration of MariaDB-based data outputs.

credentials#

The credentials required to access the MariaDB database.

Type:

UserPasswordCredentials

destination_table#

The table(s) to create. If multiple tables are provided, they must be provided as a list.

Type:

str | List[str]

if_table_exists#

The strategy to follow when the table already exists. - ‘replace’ will create a new database table, overwriting an existing one. - ‘append’ will append to an existing table.

Type:

{‘append’, ‘replace’}

uri#

The URI of the database where the data is going to be stored.

Type:

str

to_dict()[source]#

Converts the MariaDBDestination object to a dictionary

property credentials: UserPasswordCredentials#

The credentials required to access the MariaDB database.

Type:

UserPasswordCredentials

property destination_table: str | List[str]#

The table(s) to create. If multiple tables are provided, they must be provided as a list.

Type:

str | List[str]

property if_table_exists: Literal['append', 'replace']#

The strategy to follow when the table already exists.

Type:

str

property uri: str#

The URI of the database where the data is going to be stored.

Type:

str

Postgres Destination#

class PostgresDestination(uri: str, destination_table: List[str] | str, credentials: dict | UserPasswordCredentials = None, if_table_exists: Literal['append', 'replace'] = 'append')[source]#

Bases: Output

Class for managing the configuration of Postgres-based data outputs.

credentials#

The credentials required to access the Postgres database.

Type:

UserPasswordCredentials

destination_table#

The table(s) to create. If multiple tables are provided, they must be provided as a list.

Type:

str | List[str]

if_table_exists#

The strategy to follow when the table already exists. - ‘replace’ will create a new database table, overwriting an existing one. - ‘append’ will append to an existing table.

Type:

{‘append’, ‘replace’}

uri#

The URI of the database where the data is going to be stored.

Type:

str

to_dict()[source]#

Converts the PostgresDestination object to a dictionary

property credentials: UserPasswordCredentials#

The credentials required to access the Postgres database.

Type:

UserPasswordCredentials

property destination_table: str | List[str]#

The table(s) to create. If multiple tables are provided, they must be provided as a list.

Type:

str | List[str]

property if_table_exists: Literal['append', 'replace']#

The strategy to follow when the table already exists.

Type:

str

property uri: str#

The URI of the database where the data is going to be stored.

Type:

str

Oracle Destination#

class OracleDestination(uri: str, destination_table: List[str] | str, credentials: dict | UserPasswordCredentials = None, if_table_exists: Literal['append', 'replace'] = 'append')[source]#

Bases: Output

Class for managing the configuration of Oracle-based data outputs.

credentials#

The credentials required to access the Oracle database.

Type:

UserPasswordCredentials

destination_table#

The table(s) to create. If multiple tables are provided, they must be provided as a list.

Type:

str | List[str]

if_table_exists#

The strategy to follow when the table already exists. - ‘replace’ will create a new database table, overwriting an existing one. - ‘append’ will append to an existing table.

Type:

{‘append’, ‘replace’}

uri#

The URI of the database where the data is going to be stored.

Type:

str

to_dict()[source]#

Converts the OracleDestination object to a dictionary

property credentials: UserPasswordCredentials#

The credentials required to access the Oracle database.

Type:

UserPasswordCredentials

property destination_table: str | List[str]#

The table(s) to create. If multiple tables are provided, they must be provided as a list.

Type:

str | List[str]

property if_table_exists: Literal['append', 'replace']#

The strategy to follow when the table already exists.

Type:

str

property uri: str#

The URI of the database where the data is going to be stored.

Type:

str

Custom Destination (Plugin)#

class DestinationPlugin[source]#

Bases: ABC

Abstract class for output plugins.

trigger_output(*args, **kwargs)[source]#

Trigger the exporting of the data. This function will receive the resulting data from the dataset function and must store it in the desired location.

abstract trigger_output(*args, **kwargs)[source]#
Trigger the exporting of the data. This function will receive the resulting data

from the dataset function and must store it in the desired location.

Parameters:
  • *args – The data to be exported

  • **kwargs – Additional parameters to be used in the export

Returns:

None