Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
230 changes: 135 additions & 95 deletions crates/tower-package/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,15 @@ impl Package {
// less.
let base_dir = spec.base_dir.canonicalize()?;

// Canonicalize import paths upfront so the resolver can whitelist files within them.
let canonical_import_paths: Vec<PathBuf> = spec
.import_paths
.iter()
.map(|p| base_dir.join(p).canonicalize())
.collect::<Result<Vec<_>, _>>()?;

let resolver = FileResolver::new(base_dir.clone(), canonical_import_paths.clone());

let tmp_dir = TmpDir::new("tower-package").await?;
let package_path = tmp_dir.to_path_buf().join("package.tar");
debug!("building package at: {:?}", package_path);
Expand Down Expand Up @@ -253,7 +262,7 @@ impl Package {

for file_glob in file_globs {
let path = base_dir.join(file_glob);
resolve_glob_path(path, &base_dir, &mut file_paths).await;
resolver.resolve_glob(path, &mut file_paths).await;
}

// App code lives in the app dir
Expand All @@ -277,13 +286,10 @@ impl Package {
let mut import_paths = vec![];

// Now we need to package up all the modules to include in the code base too.
for import_path in &spec.import_paths {
// The import_path should always be relative to the base_path.
let import_path = base_dir.join(import_path).canonicalize()?;
let parent = import_path.parent().unwrap();
for import_path in &canonical_import_paths {

let mut file_paths = HashMap::new();
resolve_path(&import_path, parent, &mut file_paths).await;
resolver.resolve_path(&import_path, &mut file_paths).await;

// The file_name should constitute the logical path
let import_path = import_path.file_name().unwrap();
Expand Down Expand Up @@ -440,73 +446,6 @@ async fn unpack_archive<P: AsRef<Path>>(
Ok(())
}

async fn resolve_glob_path(
path: PathBuf,
base_dir: &PathBuf,
file_paths: &mut HashMap<PathBuf, PathBuf>,
) {
let path_str = extract_glob_path(path);
debug!("resolving glob pattern: {}", path_str);

for entry in glob(&path_str).unwrap() {
resolve_path(&entry.unwrap(), base_dir, file_paths).await;
}
}

async fn resolve_path(path: &PathBuf, base_dir: &Path, file_paths: &mut HashMap<PathBuf, PathBuf>) {
let mut queue = VecDeque::new();
queue.push_back(path.to_path_buf());

while let Some(current_path) = queue.pop_front() {
let canonical_path = current_path.canonicalize();

if canonical_path.is_err() {
debug!(
" - skipping path {}: {}",
current_path.display(),
canonical_path.unwrap_err()
);
continue;
}

// We can safely unwrap this because we understand that it's not going to fail at this
// point.
let physical_path = canonical_path.unwrap();

if physical_path.is_dir() {
let mut entries = tokio::fs::read_dir(&physical_path).await.unwrap();

while let Some(entry) = entries.next_entry().await.unwrap() {
queue.push_back(entry.path());
}
} else {
if !should_ignore_file(&physical_path) {
let cp = physical_path.clone();

match cp.strip_prefix(base_dir) {
Err(err) => {
debug!(
" - skipping file {}: not in base directory {}: {:?}",
physical_path.display(),
base_dir.display(),
err
);
continue;
}
Ok(logical_path) => {
debug!(
" - resolved path {} to logical path {}",
physical_path.display(),
logical_path.display()
);
file_paths.insert(physical_path, logical_path.to_path_buf());
}
}
}
}
}
}

fn is_in_dir(p: &PathBuf, dir: &str) -> bool {
let mut comps = p.components();
comps.any(|comp| {
Expand All @@ -526,37 +465,139 @@ fn is_file(p: &PathBuf, name: &str) -> bool {
}
}

fn should_ignore_file(p: &PathBuf) -> bool {
// Ignore anything that is compiled python
if p.ends_with(".pyc") {
return true;
}
struct FileResolver {
// base_dir is the directory from which logical paths are computed.
base_dir: PathBuf,

if is_file(p, "Towerfile") {
return true;
}
// import_paths are canonicalized paths to imported directories. Files within these directories
// are also allowed, with logical paths computed relative to each import path's parent.
import_paths: Vec<PathBuf>,
}

// Ignore a .gitignore file
if is_file(p, ".gitignore") {
return true;
impl FileResolver {
fn new(base_dir: PathBuf, import_paths: Vec<PathBuf>) -> Self {
Self {
base_dir,
import_paths,
}
}

// Remove anything thats __pycache__
if is_in_dir(p, "__pycache__") {
return true;
fn should_ignore(&self, p: &PathBuf) -> bool {
// Ignore anything that is compiled python
if p.ends_with(".pyc") {
return true;
}

// Only exclude the root Towerfile (base_dir/Towerfile). Since base_dir is already
// canonicalized, we can derive this path directly. Towerfiles in sub-directories are
// legitimate app content and must be preserved.
if p == &self.base_dir.join("Towerfile") {
return true;
}

// Ignore a .gitignore file
if is_file(p, ".gitignore") {
return true;
}

// Remove anything thats __pycache__
if is_in_dir(p, "__pycache__") {
return true;
}

// Ignore anything that lives within a .git directory
if is_in_dir(p, ".git") {
return true;
}

// Ignore anything that's in a virtualenv, too
if is_in_dir(p, ".venv") {
return true;
}

false
}

// Ignore anything that lives within a .git directory
if is_in_dir(p, ".git") {
return true;
fn logical_path<'a>(&self, physical_path: &'a Path) -> Option<&'a Path> {
if let Ok(p) = physical_path.strip_prefix(&self.base_dir) {
return Some(p);
}

// Try each import path's parent as a prefix. This allows files within import paths
// (which may live outside base_dir) to be resolved with logical paths that preserve
// the import directory name (e.g. "shared_lib/foo.py").
for import_path in &self.import_paths {
if let Some(parent) = import_path.parent() {
if let Ok(p) = physical_path.strip_prefix(parent) {
return Some(p);
}
}
}

None
}

// Ignore anything that's in a virtualenv, too
if is_in_dir(p, ".venv") {
return true;
async fn resolve_glob(&self, path: PathBuf, file_paths: &mut HashMap<PathBuf, PathBuf>) {
let path_str = extract_glob_path(path);
debug!("resolving glob pattern: {}", path_str);

for entry in glob(&path_str).unwrap() {
self.resolve_path(&entry.unwrap(), file_paths).await;
}
}

return false;
async fn resolve_path(&self, path: &PathBuf, file_paths: &mut HashMap<PathBuf, PathBuf>) {
let mut queue = VecDeque::new();
queue.push_back(path.to_path_buf());

while let Some(current_path) = queue.pop_front() {
let canonical_path = current_path.canonicalize();

if canonical_path.is_err() {
debug!(
" - skipping path {}: {}",
current_path.display(),
canonical_path.unwrap_err()
);
continue;
}

// We can safely unwrap this because we understand that it's not going to fail at this
// point.
let physical_path = canonical_path.unwrap();

if physical_path.is_dir() {
let mut entries = tokio::fs::read_dir(&physical_path).await.unwrap();

while let Some(entry) = entries.next_entry().await.unwrap() {
queue.push_back(entry.path());
}
} else {
if !self.should_ignore(&physical_path) {
let cp = physical_path.clone();

match self.logical_path(&cp) {
None => {
debug!(
" - skipping file {}: not in base directory {}: ...",
physical_path.display(),
self.base_dir.display(),
);
continue;
}
Some(logical_path) => {
debug!(
" - resolved path {} to logical path {}",
physical_path.display(),
logical_path.display()
);
file_paths.insert(physical_path, logical_path.to_path_buf());
}
}
}
}
}
}
}

// normalize_path converts a Path to a normalized string with forward slashes as separators.
Expand Down Expand Up @@ -650,7 +691,6 @@ pub async fn compute_sha256_file(file_path: &PathBuf) -> Result<String, Error> {
#[cfg(test)]
mod test {
use super::*;
use std::collections::HashMap;
use std::path::PathBuf;

#[tokio::test]
Expand Down
64 changes: 64 additions & 0 deletions crates/tower-package/tests/package_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,70 @@ async fn building_package_spec_from_towerfile() {
assert_eq!(spec.schedule, Some("0 0 * * *".to_string()));
}

#[tokio::test]
async fn it_includes_subapp_towerfiles_but_excludes_root_towerfile() {
// When a project contains sub-apps with their own Towerfiles, only the root Towerfile (the
// one used to build the package) should be excluded. Towerfiles belonging to sub-apps must
// be included so those apps can function correctly.
let tmp_dir = TmpDir::new("subapp-towerfile")
.await
.expect("Failed to create temp dir");

// Root app files
create_test_file(tmp_dir.to_path_buf(), "Towerfile", "[app]\nname = \"root\"").await;
create_test_file(tmp_dir.to_path_buf(), "main.py", "print('Hello, world!')").await;

// Sub-app with its own Towerfile
create_test_file(tmp_dir.to_path_buf(), "subapp/Towerfile", "[app]\nname = \"subapp\"").await;
create_test_file(tmp_dir.to_path_buf(), "subapp/main.py", "print('subapp')").await;

let spec = PackageSpec {
invoke: "main.py".to_string(),
base_dir: tmp_dir.to_path_buf(),
towerfile_path: tmp_dir.to_path_buf().join("Towerfile"),
file_globs: vec![],
parameters: vec![],
schedule: None,
import_paths: vec![],
};

let package = Package::build(spec).await.expect("Failed to build package");
let files = read_package_files(package).await;

// Root Towerfile should NOT be in the app directory (it's added separately as "Towerfile")
assert!(
!files.contains_key("app/Towerfile"),
"files {:?} should not contain the root Towerfile under app/",
files
);

// The root Towerfile is still bundled at the top level for reference
assert!(
files.contains_key("Towerfile"),
"files {:?} should contain the root Towerfile at the top level",
files
);

// Sub-app's Towerfile MUST be included
assert!(
files.contains_key("app/subapp/Towerfile"),
"files {:?} should contain the sub-app Towerfile",
files
);

// Other files should be present
assert!(
files.contains_key("app/main.py"),
"files {:?} was missing main.py",
files
);
assert!(
files.contains_key("app/subapp/main.py"),
"files {:?} was missing subapp/main.py",
files
);
}

#[tokio::test]
async fn it_includes_hidden_parameters_in_manifest() {
let tmp_dir = TmpDir::new("hidden-params")
Expand Down