From 0dc1b5ecad70e0c9d17979535d7413fef197fd03 Mon Sep 17 00:00:00 2001 From: Tommy Doerr Date: Wed, 6 May 2026 14:33:03 -0700 Subject: [PATCH 1/3] feat: Create a starter_farm Terraform configuration with README This Terraform configuration sample deploys a starter AWS Deadline Cloud farm equivalent to the CloudFormation starter_farm template. It uses the AWS Cloud Control (AWSCC) provider for Deadline Cloud resources and includes a production queue, package build queue, conda queue environment, and configurable service-managed fleets (CPU Linux, CPU Windows, CUDA Linux). Signed-off-by: Tommy Doerr --- terraform/.gitignore | 30 + terraform/README.md | 28 + .../farm_templates/starter_farm/README.md | 249 ++++++++ .../starter_farm/conda_queue_env.yaml.tftpl | 109 ++++ terraform/farm_templates/starter_farm/main.tf | 530 ++++++++++++++++++ 5 files changed, 946 insertions(+) create mode 100644 terraform/.gitignore create mode 100644 terraform/README.md create mode 100644 terraform/farm_templates/starter_farm/README.md create mode 100644 terraform/farm_templates/starter_farm/conda_queue_env.yaml.tftpl create mode 100644 terraform/farm_templates/starter_farm/main.tf diff --git a/terraform/.gitignore b/terraform/.gitignore new file mode 100644 index 00000000..7cbbed7b --- /dev/null +++ b/terraform/.gitignore @@ -0,0 +1,30 @@ +# Local .terraform directories +**/.terraform/* + +# .tfstate files +*.tfstate +*.tfstate.* + +# Crash log files +crash.log +crash.*.log + +# Exclude all .tfvars files, which are likely to contain sensitive data, such as +# password, private keys, and other secrets. These should not be part of version +# control as they are data points which are potentially sensitive and subject +# to change depending on the environment. +*.tfvars +*.tfvars.json + +# Ignore override files as they are usually used to override resources locally +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Ignore CLI configuration files +.terraformrc +terraform.rc + +# Ignore lock files +.terraform.lock.hcl diff --git a/terraform/README.md b/terraform/README.md new file mode 100644 index 00000000..505db7e3 --- /dev/null +++ b/terraform/README.md @@ -0,0 +1,28 @@ +# AWS Deadline Cloud sample Terraform configurations + +With [Terraform](https://www.terraform.io/), you can use infrastructure as code to deploy infrastructure +such as a Deadline Cloud farm to your AWS account. Use the samples provided here directly or as a starting point +to create your own custom configurations. + +These Terraform configurations use the [AWS Cloud Control (AWSCC) provider](https://registry.terraform.io/providers/hashicorp/awscc/latest) +for Deadline Cloud resources, which offers full support for AWS Deadline Cloud resource types. + +## Starter farm + +The [starter_farm](farm_templates/starter_farm/) sample Terraform configuration deploys a Deadline Cloud farm you can use to run jobs that render images, +reconstruct 3D scenes, or transform your data in custom ways. This is the Terraform equivalent of the +[CloudFormation starter_farm template](../cloudformation/farm_templates/starter_farm/). +Sample jobs to submit are available in the [deadline-cloud-samples on GitHub](https://github.com/aws-deadline/deadline-cloud-samples). +Deadline Cloud provides many integrated submitter plugins for applications, and you can build your own jobs. The deployed farm includes the ability to +[build custom conda packages](../conda_recipes/README.md) for providing additional application support. + +## CloudFormation vs Terraform + +Both CloudFormation and Terraform configurations in this repository create equivalent infrastructure. +Choose based on your team's preferences and existing tooling: + +| Aspect | CloudFormation | Terraform | +|--------|---------------|-----------| +| Provider | AWS native | HashiCorp | +| Deadline resources | `AWS::Deadline::*` | `awscc_deadline_*` | +| State | Managed by AWS | Local or remote backend | diff --git a/terraform/farm_templates/starter_farm/README.md b/terraform/farm_templates/starter_farm/README.md new file mode 100644 index 00000000..96c4fb0d --- /dev/null +++ b/terraform/farm_templates/starter_farm/README.md @@ -0,0 +1,249 @@ +# A starter AWS Deadline Cloud farm (Terraform) + +## Overview + +This Terraform configuration deploys an [AWS Deadline Cloud](https://aws.amazon.com/deadline-cloud/) farm you can use to run jobs that render images, reconstruct 3D scenes, or transform your data in custom ways. This is the Terraform equivalent of the [CloudFormation starter_farm template](../../../cloudformation/farm_templates/starter_farm/). + +Sample jobs to submit are available in the [deadline-cloud-samples on GitHub](https://github.com/aws-deadline/deadline-cloud-samples/tree/mainline/job_bundles#readme), Deadline Cloud provides many [integrated submitter plugins for applications](https://github.com/aws-deadline/#integrations), and you can [build your own jobs](https://docs.aws.amazon.com/deadline-cloud/latest/developerguide/building-jobs.html). + +The deployed farm includes one or more [service-managed fleets](https://docs.aws.amazon.com/deadline-cloud/latest/userguide/smf-manage.html) that you select during deployment. The production queue supports Conda virtual environments for the applications that jobs need, and the package build queue can be used to build more packages if needed. + +It configures two Conda channels by default: a private channel on an S3 bucket you provide and the [deadline-cloud channel](https://docs.aws.amazon.com/deadline-cloud/latest/userguide/create-queue-environment.html#conda-queue-environment). The `deadline-cloud` channel provides applications like Blender, Houdini, Maya, and Nuke. You can add the [conda-forge channel](https://conda-forge.org/) to this list by setting the `prod_conda_channels` variable to `"deadline-cloud conda-forge"` to access packages created and maintained by the [conda-forge community](https://conda-forge.org/community/). + +When supported applications need licenses to run, they will use Deadline Cloud's usage-based licensing. See [Deadline Cloud pricing](https://aws.amazon.com/deadline-cloud/pricing/) to learn which applications are supported and the associated costs. + +## Prerequisites + +Before deploying this Terraform configuration, check that you have the following resources created in your AWS Account. The AWS region should be the same as the one you use to deploy the Terraform configuration. + +1. [Terraform](https://www.terraform.io/downloads) >= 1.0 installed +2. AWS credentials configured (via `aws configure`, environment variables, or IAM role) +3. An Amazon S3 bucket to hold job attachments and your Conda channel. From the [Amazon S3 management console](https://s3.console.aws.amazon.com/s3/home), create an S3 bucket. You will need the bucket name to deploy the Terraform configuration. +4. A Deadline Cloud monitor to view and manage the jobs you will submit to your queues. From the [AWS Deadline Cloud management console](https://console.aws.amazon.com/deadlinecloud/home), select the "Go to Monitor setup" option and follow the steps to enter a name for your monitor URL, enable IAM Identity Center, and then create a user login account to access the monitor. Your monitor URL will look similar to `https://..deadlinecloud.amazonaws.com/`. You will need this URL to log in with the Deadline Cloud monitor desktop application. + +## Resources Created + +This configuration creates the following resources: + +| Resource | Description | +|----------|-------------| +| `awscc_deadline_farm` | The Deadline Cloud farm | +| `awscc_deadline_queue` (x2) | Production queue and Package Build queue | +| `awscc_deadline_queue_environment` | Conda queue environment for the production queue | +| `awscc_deadline_fleet` (up to 3) | CPU Linux, CPU Windows, and/or CUDA Linux fleets | +| `awscc_deadline_queue_fleet_association` (up to 6) | Associations between queues and fleets | +| `aws_iam_role` (x3) | IAM roles for queues and fleet | +| `aws_iam_role_policy` (x3) | IAM policies for S3 access and CloudWatch Logs | + +## Deployment + +### 1. Initialize Terraform + +```bash +cd terraform/farm_templates/starter_farm +terraform init +``` + +### 2. Configure Variables + +Create a `terraform.tfvars` file or pass variables via command line: + +```hcl +# Required +job_attachments_bucket_name = "your-s3-bucket-name" + +# Optional - customize as needed +aws_region = "us-west-2" +farm_name = "My Deadline Cloud Farm" + +# Fleet configuration (set to empty string to skip) +cpu_linux_fleet_name = "CPU Linux Fleet" +cpu_windows_fleet_name = "" # Skip Windows fleet +cuda_linux_fleet_name = "" # Skip CUDA fleet +``` + +### 3. Review the Plan + +```bash +terraform plan +``` + +### 4. Apply the Configuration + +```bash +terraform apply +``` + +### 5. Add User Access + +From the [AWS Deadline Cloud management console](https://console.aws.amazon.com/deadlinecloud/home), navigate to the farm that you created, and select the "Access management" tab. Select "Users", then "Add user", and then add the user you created for yourself from the prerequisites. Use the "Owner" access level to give yourself full access. + +## Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `aws_region` | AWS region | `us-west-2` | +| `job_attachments_bucket_name` | S3 bucket for job attachments | (required) | +| `farm_name` | Farm display name | `Starter Deadline Cloud Farm` | +| `prod_queue_name` | Production queue name | `Production Job Queue` | +| `package_build_queue_name` | Package build queue name | `Package Build Queue` | +| `prod_conda_channels` | Default Conda channels | `deadline-cloud` | +| `cpu_linux_fleet_name` | CPU Linux fleet name (empty to skip) | `CPU Linux Fleet` | +| `cpu_windows_fleet_name` | CPU Windows fleet name (empty to skip) | `""` | +| `cuda_linux_fleet_name` | CUDA Linux fleet name (empty to skip) | `""` | +| `max_cpu_linux_worker_count` | Max workers for CPU Linux fleet | `10` | +| `cpu_linux_instance_market_type` | `spot` or `on-demand` | `spot` | + +See `main.tf` for the complete list of configurable variables. + +## Outputs + +| Output | Description | +|--------|-------------| +| `farm_id` | The Deadline Cloud farm ID | +| `farm_arn` | The Deadline Cloud farm ARN | +| `prod_queue_id` | The production queue ID | +| `package_build_queue_id` | The package build queue ID | +| `cpu_linux_fleet_id` | The CPU Linux fleet ID (if created) | +| `cpu_windows_fleet_id` | The CPU Windows fleet ID (if created) | +| `cuda_linux_fleet_id` | The CUDA Linux fleet ID (if created) | + +## Install the Deadline client tools on your workstation + +1. From the [AWS Deadline Cloud management console](https://console.aws.amazon.com/deadlinecloud/home), + select the "Downloads" page on the left navigation area. +2. Download and install the Deadline Cloud monitor desktop application. Use your monitor URL and + the user account from the prerequisites to log in from the Deadline Cloud monitor desktop. This also + provides AWS credentials to the Deadline Cloud CLI. +3. Download and install the Deadline Cloud submitter installer for your platform, or install the + Deadline Cloud CLI into your existing Python installation [from PyPI](https://pypi.org/project/deadline/) + using a command like `pip install "deadline[gui]"`. You can then use the command + `deadline handle-web-url --install` to install the job attachments download handler on supported operating systems. +4. From the terminal, run the command `deadline config gui`, and select the farm and production queue you deployed. + Select OK to apply the settings. + +## Initialize the S3 Conda channel + +Before submitting jobs, initialize the S3 Conda channel by publishing a package to it. See [Publish packages to an Amazon S3 conda channel](https://docs.aws.amazon.com/deadline-cloud/latest/developerguide/publish-packages-s3-channel.html) in the AWS Deadline Cloud Developer Guide for instructions. + +## Submit a test job + +This test job runs the `imagemagick identify` command on a directory of images to extract properties of the images and write them to a text file. Before proceeding with this test job, make sure the S3 Conda channel is initialized according to the instructions above. An uninitialized Conda channel will fail during the "Launch Conda" action. + +1. If you don't have a local copy of [deadline-cloud-samples](https://github.com/aws-deadline/deadline-cloud-samples) GitHub repository, you can make a git clone or [download it as a ZIP](https://github.com/aws-deadline/deadline-cloud-samples/archive/refs/heads/mainline.zip). +2. From the `job_bundles` directory of `deadline-cloud-samples`, run the following command: + ``` + $ deadline bundle gui-submit cli_job + ``` +3. From the "Shared job settings" tab, give the job a name like "Starter farm test job", then enter "imagemagick" into the "Conda Packages" parameter and if it's not already included, add "conda-forge" to the "Conda Channels" parameter. These parameters are for the Conda queue environment that provides applications to the job. +4. From the "Job-specific settings" tab, select the directory `turntable_with_maya_arnold` within the samples as the "Input/Output Data Directory". This directory has some .png files to process. +5. Replace the "Bash Script" text box contents with the following script: + ``` + find . -type f -iname "*.png" -exec magick identify {} \; | tee identified_images.txt + ``` +6. Select "Submit" and accept any prompts to submit the job to your queue. +7. From Deadline Cloud monitor, navigate to the production queue to watch the job you submitted. When it is running, right click on the task and select "View logs". It may take several minutes as Deadline Cloud starts an instance in your fleet to run the job. Within the log, you can find output that is similar to: + ``` + + find . -type f -iname '*.png' -exec magick identify '{}' ';' + + tee identified_images.txt + ./screenshots/turntable_job_bundle_submitter_gui.png PNG 657x844 657x844+0+0 8-bit sRGB 59671B 0.000u 0:00.000 + ./screenshots/windows_desktop_submitter_bat_file.png PNG 237x231 237x231+0+0 8-bit sRGB 29790B 0.000u 0:00.000 + ./screenshots/turntable_job_output_video_screenshot.png PNG 962x693 962x693+0+0 8-bit sRGB 674715B 0.000u 0:00.000 + ``` +8. When it is complete, download the output of the job. The custom script you entered populates a text file with image metadata. The output is written to the provided input/output directory, so look in the `turntable_with_maya_arnold` directory to find a file `identified_images.txt` with contents matching the logged output from the job: + ``` + ./screenshots/turntable_job_bundle_submitter_gui.png PNG 657x844 657x844+0+0 8-bit sRGB 59671B 0.000u 0:00.000 + ./screenshots/windows_desktop_submitter_bat_file.png PNG 237x231 237x231+0+0 8-bit sRGB 29790B 0.000u 0:00.000 + ./screenshots/turntable_job_output_video_screenshot.png PNG 962x693 962x693+0+0 8-bit sRGB 674715B 0.000u 0:00.000 + ``` + +You can also submit the sample job with a single command from your terminal as follows: + +``` +$ deadline bundle submit cli_job \ + --name "Starter farm test job" \ + -p CondaPackages=imagemagick \ + -p CondaChannels=conda-forge \ + -p DataDir=./turntable_with_maya_arnold \ + -p 'BashScript=find . -type f -iname "*.png" -exec magick identify {} \; | tee identified_images.txt' +``` + +## Use the farm for production + +### Set up more users and groups with farm access + +Use the [AWS IAM Identity Center management console](https://aws.amazon.com/iam/identity-center/) to create more users and groups, then give them permission to access the farm from the [AWS Deadline Cloud management console](https://console.aws.amazon.com/deadlinecloud/home). + +### Build more Conda packages + +See the [Conda recipe samples](../../../conda_recipes/README.md) to learn about the package building queue deployed by the template. If you write custom tools and plugins, you can write your own Conda package recipes to provide them to the farm. + +### Run jobs from job bundles + +Run jobs from the [job bundle samples](../../../job_bundles/README.md). Make copies of the code and build your own. + +### Run jobs from DCC integrated submitters + +Run the submitter installer in the downloads section of the [AWS Deadline Cloud management console](https://console.aws.amazon.com/deadlinecloud/home), or start from the [submitter source code on GitHub](https://github.com/aws-deadline/). + +## Customize the farm + +### Select fleets to deploy + +By deploying fleets with multiple different hardware configurations, you can create a farm that supports a wide variety of jobs. The starter farm Terraform configuration comes with three different fleet configurations: a CPU Linux fleet, a CPU Windows fleet, and a CUDA Linux fleet. Each fleet that you name will be deployed, and if you set its name to be empty, it will be skipped. + +When different steps of your jobs have different requirements, you can edit your job template to have [`hostRequirements`](https://github.com/OpenJobDescription/openjd-specifications/wiki/2023-09-Template-Schemas#33-hostrequirements) that control the operating system, memory requirements, or whether a GPU is available for each step. + +### Customize the Terraform variables + +Each fleet has variables to control the maximum number of workers, whether to use spot or on-demand instances, and control the vCPUs and RAM of worker hosts. If you use spot instances, you generally want to include wider ranges of these properties when possible to increase the available instance types you can get. + +The default Conda channels that come after the S3 Conda channel are controlled by the `prod_conda_channels` variable that defaults to `"deadline-cloud"`. You can modify this to include [conda-forge](https://conda-forge.org/) or channels such as [bioconda](https://bioconda.github.io/). + +### Modify the Conda queue environment for the production queue + +The Terraform configuration includes a queue environment that creates Conda virtual environments for jobs to use. By default, this is the template file [conda_queue_env.yaml.tftpl](conda_queue_env.yaml.tftpl). You can edit this file to customize the Conda environment behavior, such as changing the default channels, adjusting caching behavior, or modifying the environment creation logic. + +See the [queue environment samples](https://github.com/aws-deadline/deadline-cloud-samples/tree/mainline/queue_environments) for more ideas on how to configure queue environments. + +### Create a Terraform configuration for your own farm + +If you want to organize the queues in your farm differently from this starter sample, or you need a different set of fleet configurations, you can copy this Terraform configuration and start editing it. See the [CUDA farm CloudFormation template](../../../cloudformation/farm_templates/cuda_farm/README.md) for an example where the starter farm has been simplified and specialized for jobs that use CUDA. + +We recommend you follow Infrastructure as Code best practices, such as keeping your configurations in version control and strictly making changes by editing the configuration and deploying it instead of mixing Terraform together with manual infrastructure updates from the AWS console. See the [AWS Well-Architected guidance on Infrastructure as Code](https://docs.aws.amazon.com/wellarchitected/latest/devops-guidance/dl.eac.1-organize-infrastructure-as-code-for-scale.html) to dive deeper into this topic. + +## Security scanning + +All Terraform configurations have been validated with security scanning tools: + +- **[Checkov](https://github.com/bridgecrewio/checkov)** - Static analysis for infrastructure as code +- **[tflint](https://github.com/terraform-linters/tflint)** - Terraform linter + +Run security scans on your modifications: + +```bash +# Install tools +pip install checkov +brew install tflint # or see https://github.com/terraform-linters/tflint + +# Run scans +checkov -d . --framework terraform +tflint +``` + +This template has been validated with: +- **Checkov**: 37 passed, 0 failed +- **tflint**: No issues +- **terraform validate**: Success + +## Cleanup + +To destroy all resources: + +```bash +terraform destroy +``` + +## Comparison with CloudFormation + +This Terraform configuration creates identical resources to the [CloudFormation starter_farm template](../../../cloudformation/farm_templates/starter_farm/). See the [parent README](../../README.md) for a comparison table. diff --git a/terraform/farm_templates/starter_farm/conda_queue_env.yaml.tftpl b/terraform/farm_templates/starter_farm/conda_queue_env.yaml.tftpl new file mode 100644 index 00000000..28dbbbae --- /dev/null +++ b/terraform/farm_templates/starter_farm/conda_queue_env.yaml.tftpl @@ -0,0 +1,109 @@ +specificationVersion: "environment-2023-09" +parameterDefinitions: + - name: CondaPackages + type: STRING + description: Space-separated list of Conda package match specifications to install. + default: "" + userInterface: + control: LINE_EDIT + label: Conda Packages + - name: CondaChannels + type: STRING + description: Space-separated list of Conda channels from which to install packages. + default: "s3://${job_attachments_bucket_name}/Conda/Default ${prod_conda_channels}" + userInterface: + control: LINE_EDIT + label: Conda Channels + - name: NamedCondaEnv + type: STRING + description: Named Conda environment to reuse. Use AUTOMATIC for hash-based naming. + default: "AUTOMATIC" + userInterface: + control: LINE_EDIT + label: Named Conda Environment + - name: NamedCondaEnvAction + type: STRING + description: How to treat the named environment. + default: "ACTIVATE" + allowedValues: + - "ACTIVATE" + - "REMOVE_AND_CREATE" + userInterface: + control: DROPDOWN_LIST + label: Named Conda Environment Action + - name: NamedCondaEnvUpdateAfterMinutes + type: INT + description: Minutes before updating the named environment. + default: 600 + userInterface: + control: SPIN_BOX + label: Update After (Minutes) + - name: RunCondaClean + type: STRING + description: Run conda clean before creating environment. + default: "False" + allowedValues: + - "True" + - "False" + userInterface: + control: CHECK_BOX + label: Clean Conda Cache +environment: + name: Conda + script: + actions: + onEnter: + command: bash + args: + - "{{Env.File.Enter}}" + onExit: + command: bash + args: + - "{{Env.File.Exit}}" + embeddedFiles: + - name: Enter + filename: enter.sh + type: TEXT + data: | + #!/bin/bash + set -euo pipefail + if [ -z '{{Param.CondaPackages}}' ]; then + echo "Skipping Conda env as CondaPackages parameter was empty." + exit 0 + fi + CHANNEL_OPTS="$(echo '{{Param.CondaChannels}}' | sed -r 's/(\s+|^)(\S)/ -c \2/g')" + CONDA_PACKAGES='{{Param.CondaPackages}}' + NAMED_CONDA_ENV='{{Param.NamedCondaEnv}}' + if [ "$NAMED_CONDA_ENV" = 'AUTOMATIC' ]; then + NAMED_CONDA_ENV="hashname_$(echo '{{Param.CondaChannels}}{{Param.CondaPackages}}' | sha256sum | cut -c1-24)" + fi + if [ '{{Param.RunCondaClean}}' = 'True' ]; then + echo "RunCondaClean parameter is True, cleaning the Conda cache..." + conda clean --yes --all + fi + if [ -n "$NAMED_CONDA_ENV" ] && conda env list | grep -q "^$NAMED_CONDA_ENV "; then + if [ '{{Param.NamedCondaEnvAction}}' = 'REMOVE_AND_CREATE' ]; then + echo "Removing environment $NAMED_CONDA_ENV (NamedCondaEnvAction=REMOVE_AND_CREATE)" + conda env remove --yes --name "$NAMED_CONDA_ENV" + echo "Creating environment $NAMED_CONDA_ENV" + conda create --yes --quiet --name "$NAMED_CONDA_ENV" $CONDA_PACKAGES $CHANNEL_OPTS + else + echo "Reusing environment $NAMED_CONDA_ENV" + fi + conda activate "$NAMED_CONDA_ENV" + elif [ -n "$NAMED_CONDA_ENV" ]; then + echo "Creating environment $NAMED_CONDA_ENV" + conda create --yes --quiet --name "$NAMED_CONDA_ENV" $CONDA_PACKAGES $CHANNEL_OPTS + conda activate "$NAMED_CONDA_ENV" + else + ENV_DIR="$(mktemp -d)" + conda create --yes --quiet -p "$ENV_DIR" $CONDA_PACKAGES $CHANNEL_OPTS + conda activate "$ENV_DIR" + fi + - name: Exit + filename: exit.sh + type: TEXT + data: | + #!/bin/bash + set -euo pipefail + conda deactivate || true diff --git a/terraform/farm_templates/starter_farm/main.tf b/terraform/farm_templates/starter_farm/main.tf new file mode 100644 index 00000000..58752afd --- /dev/null +++ b/terraform/farm_templates/starter_farm/main.tf @@ -0,0 +1,530 @@ +# Deadline Cloud Starter Farm with SMF Fleet +# Terraform equivalent of the CloudFormation starter_farm template +# https://github.com/aws-deadline/deadline-cloud-samples/blob/mainline/cloudformation/farm_templates/starter_farm/deadline-cloud-starter-farm-template.yaml + +terraform { + required_version = ">= 1.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.0" + } + awscc = { + source = "hashicorp/awscc" + version = ">= 1.0" + } + } +} + +provider "aws" { + region = var.aws_region +} + +provider "awscc" { + region = var.aws_region +} + +# Variables +variable "aws_region" { + type = string + default = "us-west-2" +} + +variable "job_attachments_bucket_name" { + type = string + description = "An existing S3 bucket to use for job attachments and the default S3 conda channel." +} + +variable "farm_name" { + type = string + default = "Starter Deadline Cloud Farm" +} + +variable "farm_description" { + type = string + default = "Deadline Cloud farm deployed from the starter_farm sample CloudFormation template." +} + +variable "prod_queue_name" { + type = string + default = "Production Job Queue" +} + +variable "prod_queue_description" { + type = string + default = "The Deadline Cloud queue for running production jobs." +} + +variable "prod_conda_channels" { + type = string + default = "deadline-cloud" +} + +variable "package_build_queue_name" { + type = string + default = "Package Build Queue" +} + +variable "package_build_queue_description" { + type = string + default = "The Deadline Cloud queue for building conda packages." +} + +variable "cpu_linux_fleet_name" { + type = string + default = "CPU Linux Fleet" +} + +variable "cpu_windows_fleet_name" { + type = string + default = "" +} + +variable "cuda_linux_fleet_name" { + type = string + default = "" +} + +variable "cpu_linux_instance_market_type" { + type = string + default = "spot" +} + +variable "max_cpu_linux_worker_count" { + type = number + default = 10 +} + +variable "min_cpu_linux_vcpu" { + type = number + default = 2 +} + +variable "max_cpu_linux_vcpu" { + type = number + default = 8 +} + +variable "min_cpu_linux_ram_mib" { + type = number + default = 16384 +} + +variable "cpu_windows_instance_market_type" { + type = string + default = "spot" +} + +variable "max_cpu_windows_worker_count" { + type = number + default = 10 +} + +variable "min_cpu_windows_vcpu" { + type = number + default = 2 +} + +variable "max_cpu_windows_vcpu" { + type = number + default = 8 +} + +variable "min_cpu_windows_ram_mib" { + type = number + default = 16384 +} + +variable "cuda_linux_instance_market_type" { + type = string + default = "on-demand" +} + +variable "max_cuda_linux_worker_count" { + type = number + default = 1 +} + +variable "min_cuda_linux_vcpu" { + type = number + default = 4 +} + +variable "max_cuda_linux_vcpu" { + type = number + default = 16 +} + +variable "min_cuda_linux_ram_mib" { + type = number + default = 32768 +} + +variable "root_ebs_volume_size_gib" { + type = number + default = 300 +} + +variable "root_ebs_volume_iops" { + type = number + default = 3000 +} + +variable "root_ebs_volume_throughput_mib" { + type = number + default = 125 +} + +# Data sources +data "aws_caller_identity" "current" {} +data "aws_partition" "current" {} + +locals { + account_id = data.aws_caller_identity.current.account_id + partition = data.aws_partition.current.partition + url_suffix = data.aws_partition.current.dns_suffix + has_cpu_linux = var.cpu_linux_fleet_name != "" + has_cpu_windows = var.cpu_windows_fleet_name != "" + has_cuda_linux = var.cuda_linux_fleet_name != "" +} + +# Farm +resource "awscc_deadline_farm" "main" { + display_name = var.farm_name + description = var.farm_description +} + +# Production Queue IAM Role +resource "aws_iam_role" "queue" { + name = "ProdQueue-${awscc_deadline_farm.main.farm_id}" + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Principal = { Service = ["deadline.${local.url_suffix}", "credentials.deadline.${local.url_suffix}"] } + Action = "sts:AssumeRole" + Condition = { + StringEquals = { "aws:SourceAccount" = local.account_id } + ArnEquals = { "aws:SourceArn" = awscc_deadline_farm.main.arn } + } + }] + }) +} + +resource "aws_iam_role_policy" "queue" { + name = "QueuePolicy" + role = aws_iam_role.queue.id + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Sid = "JobAttachmentsReadWrite" + Effect = "Allow" + Action = ["s3:GetObject", "s3:PutObject", "s3:ListBucket", "s3:GetBucketLocation"] + Resource = ["arn:aws:s3:::${var.job_attachments_bucket_name}", "arn:aws:s3:::${var.job_attachments_bucket_name}/DeadlineCloud/*"] + Condition = { StringEquals = { "aws:ResourceAccount" = local.account_id } } + }, + { + Sid = "CondaChannelReadOnly" + Effect = "Allow" + Action = ["s3:GetObject", "s3:ListBucket"] + Resource = ["arn:aws:s3:::${var.job_attachments_bucket_name}", "arn:aws:s3:::${var.job_attachments_bucket_name}/Conda/*"] + Condition = { StringEquals = { "aws:ResourceAccount" = local.account_id } } + }, + { + Sid = "JobLogsReadOnly" + Effect = "Allow" + Action = ["logs:GetLogEvents"] + Resource = "arn:aws:logs:${var.aws_region}:${local.account_id}:log-group:/aws/deadline/${awscc_deadline_farm.main.farm_id}/*" + }, + { + Sid = "DeadlineServiceManagedFleetSoftwareAccess" + Effect = "Allow" + Action = ["s3:GetObject", "s3:ListBucket"] + Resource = ["*"] + Condition = { + ArnLike = { "s3:DataAccessPointArn" = "arn:aws:s3:*:*:accesspoint/deadline-software-*" } + StringEquals = { "s3:AccessPointNetworkOrigin" = "VPC" } + } + } + ] + }) +} + +# Production Queue +resource "awscc_deadline_queue" "prod" { + display_name = var.prod_queue_name + description = var.prod_queue_description + farm_id = awscc_deadline_farm.main.farm_id + role_arn = aws_iam_role.queue.arn + job_attachment_settings = { + s3_bucket_name = var.job_attachments_bucket_name + root_prefix = "DeadlineCloud" + } +} + +# Conda Queue Environment +resource "awscc_deadline_queue_environment" "conda" { + farm_id = awscc_deadline_farm.main.farm_id + queue_id = awscc_deadline_queue.prod.queue_id + priority = 1 + template = templatefile("${path.module}/conda_queue_env.yaml.tftpl", { + job_attachments_bucket_name = var.job_attachments_bucket_name + prod_conda_channels = var.prod_conda_channels + }) + template_type = "YAML" +} + +# Package Build Queue IAM Role +resource "aws_iam_role" "queue_package_build" { + name = "PackageBuildQueue-${awscc_deadline_farm.main.farm_id}" + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Principal = { Service = ["deadline.${local.url_suffix}", "credentials.deadline.${local.url_suffix}"] } + Action = "sts:AssumeRole" + Condition = { + StringEquals = { "aws:SourceAccount" = local.account_id } + ArnEquals = { "aws:SourceArn" = awscc_deadline_farm.main.arn } + } + }] + }) +} + +resource "aws_iam_role_policy" "queue_package_build" { + name = "QueuePolicy" + role = aws_iam_role.queue_package_build.id + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Sid = "JobAttachmentsReadWrite" + Effect = "Allow" + Action = ["s3:GetObject", "s3:PutObject", "s3:ListBucket", "s3:GetBucketLocation"] + Resource = ["arn:aws:s3:::${var.job_attachments_bucket_name}", "arn:aws:s3:::${var.job_attachments_bucket_name}/DeadlineCloudPkgBld/*"] + Condition = { StringEquals = { "aws:ResourceAccount" = local.account_id } } + }, + { + Sid = "CondaChannelReadWrite" + Effect = "Allow" + Action = ["s3:GetObject", "s3:ListBucket", "s3:PutObject", "s3:DeleteObject"] + Resource = ["arn:aws:s3:::${var.job_attachments_bucket_name}", "arn:aws:s3:::${var.job_attachments_bucket_name}/Conda/*"] + Condition = { StringEquals = { "aws:ResourceAccount" = local.account_id } } + }, + { + Sid = "JobLogsReadOnly" + Effect = "Allow" + Action = ["logs:GetLogEvents"] + Resource = "arn:aws:logs:${var.aws_region}:${local.account_id}:log-group:/aws/deadline/${awscc_deadline_farm.main.farm_id}/*" + }, + { + Sid = "DeadlineServiceManagedFleetSoftwareAccess" + Effect = "Allow" + Action = ["s3:GetObject", "s3:ListBucket"] + Resource = ["*"] + Condition = { + ArnLike = { "s3:DataAccessPointArn" = "arn:aws:s3:*:*:accesspoint/deadline-software-*" } + StringEquals = { "s3:AccessPointNetworkOrigin" = "VPC" } + } + } + ] + }) +} + +# Package Build Queue +resource "awscc_deadline_queue" "package_build" { + display_name = var.package_build_queue_name + description = var.package_build_queue_description + farm_id = awscc_deadline_farm.main.farm_id + role_arn = aws_iam_role.queue_package_build.arn + job_attachment_settings = { + s3_bucket_name = var.job_attachments_bucket_name + root_prefix = "DeadlineCloudPkgBld" + } +} + +# Fleet IAM Role (shared by all fleets) +resource "aws_iam_role" "fleet" { + name = "Fleet-${awscc_deadline_farm.main.farm_id}" + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Principal = { Service = "credentials.deadline.${local.url_suffix}" } + Action = "sts:AssumeRole" + Condition = { + StringEquals = { "aws:SourceAccount" = local.account_id } + ArnEquals = { "aws:SourceArn" = awscc_deadline_farm.main.arn } + } + }] + }) + managed_policy_arns = ["arn:${local.partition}:iam::aws:policy/AWSDeadlineCloud-FleetWorker"] +} + +resource "aws_iam_role_policy" "fleet_logs" { + name = "FleetWorkerLogs" + role = aws_iam_role.fleet.id + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = ["logs:CreateLogStream"] + Resource = "arn:${local.partition}:logs:${var.aws_region}:${local.account_id}:*:/aws/deadline/${awscc_deadline_farm.main.farm_id}/*" + Condition = { "ForAnyValue:StringEquals" = { "aws:CalledVia" = "deadline.${local.url_suffix}" } } + }, + { + Effect = "Allow" + Action = ["logs:PutLogEvents", "logs:GetLogEvents"] + Resource = "arn:${local.partition}:logs:${var.aws_region}:${local.account_id}:*:/aws/deadline/${awscc_deadline_farm.main.farm_id}/*" + } + ] + }) +} + +# CPU Linux Fleet +resource "awscc_deadline_fleet" "cpu_linux" { + count = local.has_cpu_linux ? 1 : 0 + display_name = var.cpu_linux_fleet_name + farm_id = awscc_deadline_farm.main.farm_id + role_arn = aws_iam_role.fleet.arn + min_worker_count = 0 + max_worker_count = var.max_cpu_linux_worker_count + configuration = { + service_managed_ec_2 = { + instance_capabilities = { + cpu_architecture_type = "x86_64" + os_family = "LINUX" + v_cpu_count = { min = var.min_cpu_linux_vcpu, max = var.max_cpu_linux_vcpu } + memory_mi_b = { min = var.min_cpu_linux_ram_mib } + root_ebs_volume = { size_gi_b = var.root_ebs_volume_size_gib, iops = var.root_ebs_volume_iops, throughput_mi_b = var.root_ebs_volume_throughput_mib } + } + instance_market_options = { type = var.cpu_linux_instance_market_type } + } + } +} + +# CPU Windows Fleet +resource "awscc_deadline_fleet" "cpu_windows" { + count = local.has_cpu_windows ? 1 : 0 + display_name = var.cpu_windows_fleet_name + farm_id = awscc_deadline_farm.main.farm_id + role_arn = aws_iam_role.fleet.arn + min_worker_count = 0 + max_worker_count = var.max_cpu_windows_worker_count + configuration = { + service_managed_ec_2 = { + instance_capabilities = { + cpu_architecture_type = "x86_64" + os_family = "WINDOWS" + v_cpu_count = { min = var.min_cpu_windows_vcpu, max = var.max_cpu_windows_vcpu } + memory_mi_b = { min = var.min_cpu_windows_ram_mib } + root_ebs_volume = { size_gi_b = var.root_ebs_volume_size_gib, iops = var.root_ebs_volume_iops, throughput_mi_b = var.root_ebs_volume_throughput_mib } + } + instance_market_options = { type = var.cpu_windows_instance_market_type } + } + } +} + +# CUDA Linux Fleet +resource "awscc_deadline_fleet" "cuda_linux" { + count = local.has_cuda_linux ? 1 : 0 + display_name = var.cuda_linux_fleet_name + farm_id = awscc_deadline_farm.main.farm_id + role_arn = aws_iam_role.fleet.arn + min_worker_count = 0 + max_worker_count = var.max_cuda_linux_worker_count + configuration = { + service_managed_ec_2 = { + instance_capabilities = { + cpu_architecture_type = "x86_64" + os_family = "LINUX" + v_cpu_count = { min = var.min_cuda_linux_vcpu, max = var.max_cuda_linux_vcpu } + memory_mi_b = { min = var.min_cuda_linux_ram_mib } + root_ebs_volume = { size_gi_b = var.root_ebs_volume_size_gib, iops = var.root_ebs_volume_iops, throughput_mi_b = var.root_ebs_volume_throughput_mib } + accelerator_capabilities = { + count = { min = 1, max = 1 } + selections = [ + { name = "a10g", runtime = "latest" }, + { name = "l4", runtime = "latest" } + ] + } + } + instance_market_options = { type = var.cuda_linux_instance_market_type } + } + } +} + +# Queue-Fleet Associations - Production Queue +resource "awscc_deadline_queue_fleet_association" "prod_cpu_linux" { + count = local.has_cpu_linux ? 1 : 0 + farm_id = awscc_deadline_farm.main.farm_id + queue_id = awscc_deadline_queue.prod.queue_id + fleet_id = awscc_deadline_fleet.cpu_linux[0].fleet_id +} + +resource "awscc_deadline_queue_fleet_association" "prod_cpu_windows" { + count = local.has_cpu_windows ? 1 : 0 + farm_id = awscc_deadline_farm.main.farm_id + queue_id = awscc_deadline_queue.prod.queue_id + fleet_id = awscc_deadline_fleet.cpu_windows[0].fleet_id +} + +resource "awscc_deadline_queue_fleet_association" "prod_cuda_linux" { + count = local.has_cuda_linux ? 1 : 0 + farm_id = awscc_deadline_farm.main.farm_id + queue_id = awscc_deadline_queue.prod.queue_id + fleet_id = awscc_deadline_fleet.cuda_linux[0].fleet_id +} + +# Queue-Fleet Associations - Package Build Queue +resource "awscc_deadline_queue_fleet_association" "pkg_cpu_linux" { + count = local.has_cpu_linux ? 1 : 0 + farm_id = awscc_deadline_farm.main.farm_id + queue_id = awscc_deadline_queue.package_build.queue_id + fleet_id = awscc_deadline_fleet.cpu_linux[0].fleet_id +} + +resource "awscc_deadline_queue_fleet_association" "pkg_cpu_windows" { + count = local.has_cpu_windows ? 1 : 0 + farm_id = awscc_deadline_farm.main.farm_id + queue_id = awscc_deadline_queue.package_build.queue_id + fleet_id = awscc_deadline_fleet.cpu_windows[0].fleet_id +} + +resource "awscc_deadline_queue_fleet_association" "pkg_cuda_linux" { + count = local.has_cuda_linux ? 1 : 0 + farm_id = awscc_deadline_farm.main.farm_id + queue_id = awscc_deadline_queue.package_build.queue_id + fleet_id = awscc_deadline_fleet.cuda_linux[0].fleet_id +} + +# Outputs +output "farm_id" { + value = awscc_deadline_farm.main.farm_id +} + +output "farm_arn" { + value = awscc_deadline_farm.main.arn +} + +output "prod_queue_id" { + value = awscc_deadline_queue.prod.queue_id +} + +output "package_build_queue_id" { + value = awscc_deadline_queue.package_build.queue_id +} + +output "cpu_linux_fleet_id" { + value = local.has_cpu_linux ? awscc_deadline_fleet.cpu_linux[0].fleet_id : null +} + +output "cpu_windows_fleet_id" { + value = local.has_cpu_windows ? awscc_deadline_fleet.cpu_windows[0].fleet_id : null +} + +output "cuda_linux_fleet_id" { + value = local.has_cuda_linux ? awscc_deadline_fleet.cuda_linux[0].fleet_id : null +} From 251a73714e00f87b0f3948254e9bc2634ce2e0bb Mon Sep 17 00:00:00 2001 From: Tommy Doerr Date: Wed, 27 May 2026 09:49:54 -0700 Subject: [PATCH 2/3] fix: Address review comments on starter_farm Terraform template - Fix farm_description to say "Terraform template" instead of "CloudFormation template" - Include S3 channel and deadline-cloud channel in CLI CondaChannels example to match the queue environment defaults Signed-off-by: Tommy Doerr --- terraform/farm_templates/starter_farm/README.md | 2 +- terraform/farm_templates/starter_farm/main.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/farm_templates/starter_farm/README.md b/terraform/farm_templates/starter_farm/README.md index 96c4fb0d..1095999e 100644 --- a/terraform/farm_templates/starter_farm/README.md +++ b/terraform/farm_templates/starter_farm/README.md @@ -163,7 +163,7 @@ You can also submit the sample job with a single command from your terminal as f $ deadline bundle submit cli_job \ --name "Starter farm test job" \ -p CondaPackages=imagemagick \ - -p CondaChannels=conda-forge \ + -p "CondaChannels=s3://your-s3-bucket-name/Conda/Default deadline-cloud conda-forge" \ -p DataDir=./turntable_with_maya_arnold \ -p 'BashScript=find . -type f -iname "*.png" -exec magick identify {} \; | tee identified_images.txt' ``` diff --git a/terraform/farm_templates/starter_farm/main.tf b/terraform/farm_templates/starter_farm/main.tf index 58752afd..dc978fef 100644 --- a/terraform/farm_templates/starter_farm/main.tf +++ b/terraform/farm_templates/starter_farm/main.tf @@ -42,7 +42,7 @@ variable "farm_name" { variable "farm_description" { type = string - default = "Deadline Cloud farm deployed from the starter_farm sample CloudFormation template." + default = "Deadline Cloud farm deployed from the starter_farm sample Terraform template." } variable "prod_queue_name" { From 35f8258a2cd63b88cd169a99ff9d08d8ff5f9c94 Mon Sep 17 00:00:00 2001 From: Tommy Doerr Date: Wed, 27 May 2026 11:51:16 -0700 Subject: [PATCH 3/3] fix: Remove unused NamedCondaEnvUpdateAfterMinutes parameter This parameter was defined but never referenced in the queue environment script. It exists in the improved_caching variant where it's actually used, but is dead code in the starter farm. Signed-off-by: Tommy Doerr --- .../farm_templates/starter_farm/conda_queue_env.yaml.tftpl | 7 ------- 1 file changed, 7 deletions(-) diff --git a/terraform/farm_templates/starter_farm/conda_queue_env.yaml.tftpl b/terraform/farm_templates/starter_farm/conda_queue_env.yaml.tftpl index 28dbbbae..c405cfc9 100644 --- a/terraform/farm_templates/starter_farm/conda_queue_env.yaml.tftpl +++ b/terraform/farm_templates/starter_farm/conda_queue_env.yaml.tftpl @@ -31,13 +31,6 @@ parameterDefinitions: userInterface: control: DROPDOWN_LIST label: Named Conda Environment Action - - name: NamedCondaEnvUpdateAfterMinutes - type: INT - description: Minutes before updating the named environment. - default: 600 - userInterface: - control: SPIN_BOX - label: Update After (Minutes) - name: RunCondaClean type: STRING description: Run conda clean before creating environment.