Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 14 additions & 16 deletions plantcv/parallel/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,26 +65,24 @@ def _read_checkpoint_data(df, config, removed_df):
if "chkpt_start_dir" not in config.__dict__:
config.chkpt_start_dir = config.tmp_dir
# look for any json files in a checkpoint directory (made by run_parallel)
# any that have a "completed" companion file should be appended to list
# to read in as existing results
existing_json = []
for _, _, files in os.walk(os.path.join(config.chkpt_start_dir, "_PCV_PARALLEL_CHECKPOINT_")):
for root, _, files in os.walk(os.path.join(config.chkpt_start_dir, "_PCV_PARALLEL_CHECKPOINT_")):
for file in files:
if file.lower().endswith(".json"):
existing_json.append(file)
# if there are json files in checkpoint then this is a re-run
if file.lower().endswith(".json") and os.path.exists(
os.path.join(root, os.path.splitext(file)[0]+"_complete")):
existing_json.append(os.path.join(root, file))
# if there are completed checkpoint files in checkpoint then this is a re-run with existing data
if any(existing_json) and config.checkpoint:
ran_list = [pd.DataFrame()]
# look through checkpoint directory for json without "completed" companion file
for root, _, files in os.walk(os.path.join(config.chkpt_start_dir, "_PCV_PARALLEL_CHECKPOINT_")):
for file in files:
if file.lower().endswith(".json") and os.path.exists(
os.path.join(root, os.path.splitext(file)[0]+"_complete")
):
with open(os.path.join(root, file), "r") as fp:
j = json.load(fp)["metadata"]
row = {}
for var in j:
row[var] = j[var]["value"]
ran_list.append(pd.DataFrame.from_dict(row))
for file in existing_json:
with open(file, "r") as fp:
j = json.load(fp)["metadata"]
row = {}
for var in j:
row[var] = j[var]["value"]
ran_list.append(pd.DataFrame.from_dict(row))
# bind to metadata dataframe
already_run = pd.concat(ran_list)
already_run = already_run[already_run["filepath"].notna()]
Expand Down
3 changes: 2 additions & 1 deletion plantcv/parallel/run_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ def run_parallel(config):
# if a logs directory is specified then add the tmpdir inside it.
if config.cluster_config["log_directory"] is not None:
config.cluster_config["log_directory"] = os.path.join(
config.cluster_config["log_directory"], os.path.basename(config.tmp_dir)
config.cluster_config["log_directory"],
"log_" + os.path.basename(config.tmp_dir)
)
os.makedirs(config.cluster_config["log_directory"], exist_ok=True)
# Create img_outdir
Expand Down