aztk.spark.models package

class aztk.spark.models.Application(task: aztk.models.task.Task)[source]

Bases: object

class aztk.spark.models.ApplicationConfiguration(name=None, application=None, application_args=None, main_class=None, jars=None, py_files=None, files=None, driver_java_options=None, driver_library_path=None, driver_class_path=None, driver_memory=None, executor_memory=None, driver_cores=None, executor_cores=None, max_retry_count=None)[source]

Bases: object

class aztk.spark.models.ApplicationLog(application_log: aztk.models.application_log.ApplicationLog)[source]

Bases: aztk.models.application_log.ApplicationLog

class aztk.spark.models.ApplicationState[source]

Bases: enum.Enum

An enumeration.

Running = 'running'
Completed = 'completed'
Failed = 'failed'
Preparing = 'preparing'
class aztk.spark.models.Cluster(cluster: aztk.models.cluster.Cluster)[source]

Bases: aztk.models.cluster.Cluster

is_pool_running_spark(pool: azure.batch.models.cloud_pool_py3.CloudPool)[source]
class aztk.spark.models.ClusterConfiguration(*args, **kwargs)[source]

Bases: aztk.models.cluster_configuration.ClusterConfiguration

spark_configuration

Field is another model

Parameters:
  • model (aztk.core.models.Model) – Model object that field should be
  • merge_strategy (ModelMergeStrategy) – When merging models how should the nested model be merged. Default: ModelMergeStrategy.merge
worker_on_master

Model Boolean field

class aztk.spark.models.DockerConfiguration(**kwargs)[source]

Bases: aztk.models.secrets_configuration.DockerConfiguration

class aztk.spark.models.Enum[source]

Bases: object

Generic enumeration.

Derive from this class to define new enumerations.

class aztk.spark.models.File(name: str, payload: _io.StringIO)[source]

Bases: aztk.models.file.File

class aztk.spark.models.FileShare(**kwargs)[source]

Bases: aztk.models.file_share.FileShare

class aztk.spark.models.Job(cloud_job_schedule: azure.batch.models.cloud_job_schedule_py3.CloudJobSchedule, tasks: List[aztk.models.task.Task] = None, pool: azure.batch.models.cloud_pool_py3.CloudPool = None, nodes: azure.batch.models.compute_node_paged.ComputeNodePaged = None)[source]

Bases: object

class aztk.spark.models.JobConfiguration(id=None, applications=None, vm_size=None, spark_configuration=None, toolkit=None, max_dedicated_nodes=0, max_low_pri_nodes=0, subnet_id=None, scheduling_target: aztk.models.scheduling_target.SchedulingTarget = None, worker_on_master=None)[source]

Bases: object

to_cluster_config()[source]
mixed_mode() → bool[source]
get_docker_repo() → str[source]
get_docker_run_options() → str[source]
validate() → bool[source]

Validate the config at its current state. Raises: Error if invalid

class aztk.spark.models.JobState[source]

Bases: enum.Enum

An enumeration.

active = 'active'
completed = 'completed'
disabled = 'disabled'
terminating = 'terminating'
deleting = 'deleting'
class aztk.spark.models.List[source]

Bases: list, typing.MutableSequence

class aztk.spark.models.Model(**kwargs)[source]

Bases: object

Base class for all aztk models

To implement model wide validation implement __validate__ method

__getstate__()[source]

For pickle serialization. This return the state of the model

__setstate__(state)[source]

For pickle serialization. This update the current model with the given state

validate()[source]

Validate the entire model

merge(other)[source]
classmethod from_dict(val: dict)[source]
to_dict()[source]
class aztk.spark.models.PluginConfiguration(**kwargs)[source]

Bases: aztk.models.plugins.plugin_configuration.PluginConfiguration

class aztk.spark.models.PortForwardingSpecification(**kwargs)[source]

Bases: aztk.models.port_forward_specification.PortForwardingSpecification

class aztk.spark.models.RemoteLogin(remote_login: aztk.models.remote_login.RemoteLogin)[source]

Bases: aztk.models.remote_login.RemoteLogin

class aztk.spark.models.SchedulingTarget[source]

Bases: enum.Enum

Target where task will get scheduled. For spark this is where the driver will live.

Master = 'master'

Only master is allowed to run task

Any = 'any'

Any node(Not recommended if using low pri) (Default)

class aztk.spark.models.SecretsConfiguration(**kwargs)[source]

Bases: aztk.models.secrets_configuration.SecretsConfiguration

class aztk.spark.models.ServicePrincipalConfiguration(**kwargs)[source]

Bases: aztk.models.secrets_configuration.ServicePrincipalConfiguration

class aztk.spark.models.SharedKeyConfiguration(**kwargs)[source]

Bases: aztk.models.secrets_configuration.SharedKeyConfiguration

class aztk.spark.models.SparkConfiguration(*args, **kwargs)[source]

Bases: aztk.core.models.model.Model

spark_defaults_conf

Model String field

spark_env_sh

Model String field

core_site_xml

Model String field

jars

Field that should be a list

class aztk.spark.models.SparkToolkit(version: str, environment: str = None, environment_version: str = None)[source]

Bases: aztk.models.toolkit.Toolkit

class aztk.spark.models.UserConfiguration(**kwargs)[source]

Bases: aztk.models.user_configuration.UserConfiguration

class aztk.spark.models.VmImage(publisher, offer, sku)[source]

Bases: aztk.models.vm_image.VmImage