Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 28 additions & 23 deletions cfa/cloudops/_cloudclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,9 @@ def create_pool(
cache_blobfuse (bool): Whether to enable blobfuse caching for mounted storage.
Improves performance for read-heavy workloads. Default is True.
replace_existing_pool (bool): Whether to replace the existing pool if it already exists. Default is False.
enable_node_monitoring (bool): Whether to enable node profiling
monitoring_script_url (str): sas token blob url to profiler script
monitoring_interval_seconds (int): Interval at which monitoring script gathers profiling data on node

Raises:
RuntimeError: If the pool creation fails due to Azure Batch service errors,
Expand Down Expand Up @@ -354,30 +357,32 @@ def create_pool(
raise ValueError(
"monitoring_script_url is required when enabling node monitoring"
)
else:
start_task_command = rf"""/bin/bash -c 'set -euo pipefail mkdir
-p /mnt/batch/tasks/startup/wd/node-metrics chmod +x ./start-metrics.sh
nohup ./start-metrics.sh {monitoring_interval_seconds} \

start_task_command = rf"""/bin/bash -c '
set -euo pipefail &&
mkdir -p /mnt/batch/tasks/startup/wd/node-metrics
chmod +x ./start-metrics.sh
nohup ./start-metrics.sh {monitoring_interval_seconds} output \
>/mnt/batch/tasks/startup/wd/node-metrics/collector.out \
2>/mnt/batch/tasks/startup/wd/node-metrics/collector.err &'
"""

pool_config.start_task = models.StartTask(
command_line=start_task_command,
wait_for_success=True,
resource_files=[
models.ResourceFile(
http_url=monitoring_script_url,
file_path="start-metrics.sh",
)
],
user_identity=models.UserIdentity(
auto_user=models.AutoUserSpecification(
scope=models.AutoUserScope.pool,
elevation_level=models.ElevationLevel.admin,
)
),
)
2>/mnt/batch/tasks/startup/wd/node-metrics/collector.err &
'"""

pool_config.start_task = models.StartTask(
command_line=start_task_command,
wait_for_success=True,
resource_files=[
models.ResourceFile(
http_url=monitoring_script_url,
file_path="start-metrics.sh",
)
],
user_identity=models.UserIdentity(
auto_user=models.AutoUserSpecification(
scope=models.AutoUserScope.pool,
elevation_level=models.ElevationLevel.admin,
)
),
)

# Configure scaling settings
if autoscale:
Expand Down
3 changes: 3 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/).
The versioning pattern is `major.minor.patch`.

---
## v0.3.22
- Added/updated node profiling option to `create_pool` function in `_cloudclient.py`

## v0.3.21
- added metadata lookup methods for Azure Function Apps to the `FunctionAppClient` module

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "cfa.cloudops"
version = "0.3.21"
version = "0.3.22"
description = "Cloud storage, batch, functions, MLOps assistance"
authors = [
{name = "Ryan Raasch", email = "xng3@cdc.gov"}
Expand Down
Loading