Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions balancer/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,72 @@ def _set_enabled(cfg: dict[str, Any], override_cfg: dict[str, Any]):
disk_cfg["rate"][p] = disk_rate_cfg

if modified:
logger.info(f"Configuration updated: {section} - {list(yaml_updates.keys())}")
self._patch_limit_policy_yaml(yaml_updates, self._config_path)

return modified


def update_config_section(self, section: str, updates: dict[str, Any]) -> bool:
"""Generic method to update a config section (e.g., weights_top, thresholds, etc.).

Args:
section: The top-level config section name (e.g., 'weights_top', 'thresholds')
updates: Dictionary with key-value pairs to update within that section

Returns:
True if config was updated successfully, False otherwise
"""
from utils.logger import logger

if not isinstance(updates, dict) or not section:
return False

modified = False
yaml_updates = {}

with self._persist_lock:
# Get or create the section
section_data = getattr(self, section, None)
if section_data is None:
setattr(self, section, {})
section_data = {}
elif not isinstance(section_data, dict):
# Section exists but is not a dict, create new dict
setattr(self, section, {})
section_data = {}

# Update each key-value pair
for key, value in updates.items():
try:
# Type conversion based on existing type or default to int for weights
if section_data and key in section_data:
# Match existing type
existing_type = type(section_data[key])
if existing_type in (int, float, bool, str):
new_value = existing_type(value)
else:
new_value = value
else:
# Default conversion for common types
if isinstance(value, (int, float, bool, str)):
new_value = value
else:
new_value = value

if section_data.get(key) != new_value:
section_data[key] = new_value
yaml_updates[(section, key)] = new_value
modified = True
except (TypeError, ValueError) as e:
logger.warning(f"Could not update {section}.{key}: {e}")
pass

# Update the section attribute
setattr(self, section, section_data)

if modified:
logger.info(f"Configuration updated: {section} - {list(yaml_updates.keys())}")
self._patch_limit_policy_yaml(yaml_updates, self._config_path)

return modified
Expand Down
5 changes: 2 additions & 3 deletions balancer/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,7 @@ controlled_apps:
weights_top:
cpu: 2
memory: 7
io: 1 # reserved; IO is ranked separately via mode='io' and does not contribute to the default score
gpu: 5 # added to the default score as a secondary factor after GPU data is collected
gpu: 5 #added to the default score as a secondary factor after GPU data is collected
blacklist:
- systemd
- kworker
Expand Down Expand Up @@ -133,7 +132,7 @@ network_thresholds:
medium: 0.5
high: 0.7
critical: 0.9
network_interface: "enp9s0" # 请根据实际情况修改为你的网络接口名称
network_interface: "enp2s0" # 请根据实际情况修改为你的网络接口名称
network_bandwidth_kbit: 100000 # 当前设置为100Mbps(小了10倍)便于测试,请根据实际带宽调整,单位为kbit/s
config_network_bw: # kbit/s, 不同优先级对应的带宽范围设置, 为了便于测试都缩小了10倍
system:
Expand Down
3 changes: 3 additions & 0 deletions balancer/dashboard/src/api/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,4 +93,7 @@ export const api = {
post<ResourceLimitProfileData>('/app/resource_limit_profile', payload),
resourceRestore: (payload: Pick<AppIdPayload, 'app_id'>) =>
post<void>('/app/resource_restore', payload),
getWeightsTop: () => get<{ cpu: number; memory: number; gpu: number }>('/monitor/config/weights_top'),
updateWeightsTop: (weights: { cpu?: number; memory?: number; gpu?: number }) =>
post<{ success: boolean; updated_weights: { cpu: number; memory: number; gpu: number } }>('/monitor/config/weights_top', weights),
}
17 changes: 16 additions & 1 deletion balancer/dashboard/src/components/AppResources.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ import {
Space,
Row,
Col,
Button,
} from 'antd'
import { ReloadOutlined, ThunderboltOutlined } from '@ant-design/icons'
import { ReloadOutlined, ThunderboltOutlined, SettingOutlined } from '@ant-design/icons'
import type { ColumnsType } from 'antd/es/table'
import { COLORS } from '../styles/theme'
import { api } from '../api/client'
import type { AppResourceEntry, AppDiskIoEntry } from '../api/types'
import SettingsModal from './SettingsModal'
import { usePolling } from '../hooks/usePolling'

const { Text, Title } = Typography
Expand Down Expand Up @@ -61,6 +63,7 @@ export default function AppResources({ active }: Props) {
const [loading, setLoading] = useState(true)
const [error, setError] = useState<string | null>(null)
const [lastUpdated, setLastUpdated] = useState<Date | null>(null)
const [settingsVisible, setSettingsVisible] = useState(false)

const fetchData = useCallback(async () => {
try {
Expand Down Expand Up @@ -304,6 +307,13 @@ export default function AppResources({ active }: Props) {
)}

<div style={{ display: 'flex', justifyContent: 'flex-end', alignItems: 'center', marginBottom: 8, gap: 12 }}>
<Button
type="text"
icon={<SettingOutlined style={{ fontSize: 16, color: COLORS.accent }} />}
onClick={() => setSettingsVisible(true)}
style={{ padding: '4px 8px' }}
title="Configure Score Weights"
/>
{lastUpdated && (
<Text style={{ color: COLORS.textMuted, fontSize: 11 }}>
<ReloadOutlined style={{ marginRight: 4 }} />
Expand Down Expand Up @@ -390,6 +400,11 @@ export default function AppResources({ active }: Props) {
</div>
</Col>
</Row>

<SettingsModal
visible={settingsVisible}
onClose={() => setSettingsVisible(false)}
/>
</div>
)
}
116 changes: 116 additions & 0 deletions balancer/monitor/monitor_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -824,3 +824,119 @@ def update_network_pressure_level(self, network_data):
except Exception as e:
logger.error("Failed to update network pressure level: %s", str(e))
return "unknown", "unknown", 0.0, 0.0


@monitor_bp.route('/config/weights_top', methods=['GET'])
def get_weights_top():
"""Get current weights_top configuration.

Response:
{
"cpu": int,
"memory": int,
"io": int,
"gpu": int
}
"""
try:
from config.config import b_config
weights = b_config.weights_top or {}
return construct_response(
data=weights,
retmsg="Successfully retrieved weights_top configuration"
)
except Exception as e:
logger.error(f"get_weights_top failed: {str(e)}")
return construct_response(
data={},
retcode=RetCode.EXCEPTION_ERROR,
retmsg=str(e)
)


@monitor_bp.route('/config/weights_top', methods=['POST'])
def update_weights_top():
"""Update weights_top configuration.

Request body:
{
"cpu": int (optional),
"memory": int (optional),
"gpu": int (optional)
}

Note: I/O weight is not configurable via this API as Disk I/O ranking
uses pure throughput (MB/s) without weight adjustment.

Response:
{
"success": bool,
"updated_weights": dict
}
"""
try:
from config.config import b_config

data = request.get_json()
if not isinstance(data, dict):
return construct_response(
data={"success": False},
retcode=RetCode.PARAM_ERROR,
retmsg="Request body must be a JSON object"
)

# Validate input
valid_keys = ['cpu', 'memory', 'gpu']
updates = {}
for key in valid_keys:
if key in data:
try:
updates[key] = int(data[key])
if updates[key] < 0:
return construct_response(
data={"success": False},
retcode=RetCode.PARAM_ERROR,
retmsg=f"Weight for {key} must be non-negative"
)
except (TypeError, ValueError):
return construct_response(
data={"success": False},
retcode=RetCode.PARAM_ERROR,
retmsg=f"Invalid value for {key}, must be an integer"
)

if not updates:
return construct_response(
data={"success": False},
retcode=RetCode.PARAM_ERROR,
retmsg="No valid weight updates provided"
)

# Update the configuration
logger.info(f"Updating weights_top configuration: {updates}")
success = b_config.update_config_section('weights_top', updates)

if success:
logger.info(f"Successfully updated weights_top to: {b_config.weights_top}")
return construct_response(
data={
"success": True,
"updated_weights": b_config.weights_top
},
retmsg="Successfully updated weights_top configuration"
)
else:
logger.error("Failed to update weights_top configuration")
return construct_response(
data={"success": False},
retcode=RetCode.EXCEPTION_ERROR,
retmsg="Failed to update configuration"
)

except Exception as e:
logger.error(f"update_weights_top failed: {str(e)}")
return construct_response(
data={"success": False},
retcode=RetCode.EXCEPTION_ERROR,
retmsg=str(e)
)
Loading
Loading