diff --git a/docs/modules/tutorial/pages/configurationfiles/benchmark/scalability.adoc b/docs/modules/tutorial/pages/configurationfiles/benchmark/scalability.adoc index 2c9960a0..a0b44d4d 100644 --- a/docs/modules/tutorial/pages/configurationfiles/benchmark/scalability.adoc +++ b/docs/modules/tutorial/pages/configurationfiles/benchmark/scalability.adoc @@ -4,7 +4,9 @@ Lists all the files where performance variables can be found. directory [*str*]:: - Common directory where files containing performance variables can be found. + Common directory where files containing performance variables can be found. This field is optional **only if all stages extract from `stdout`**. + If at least one stage extracts from a file, `directory` must be provided. + clean_directory [*bool*] (Optional):: If true, it will delete the contents of inside `directory`. @@ -13,16 +15,16 @@ clean_directory [*bool*] (Optional):: stages [*List[Stage]*]:: Describes the files containing performance variables, and how to extract them. - -name [*str*]::: + -name [*str* (Optional)]::: Name to describe the stage. It is used as prefix to add to the performance variables found in the file. - If no prefix is needed, the name can be "". + If no prefix is needed, the name can be ommited. - -filepath [*str*]::: - Relative filepath of the file containing performance variables, relative to the `directory` field. + -filepath [*str*|"stdout"]::: + Can be either "stdout" or relative filepath of the file containing performance variables, relative to the `directory` field. -format [*str*]::: Format of the stage file. - Supported values are "csv" and "json". + Supported values are "regex", "csv" and "json". -units [*Dict[str,str]*] (Optional)::: Custom units for certain performance variables. @@ -35,6 +37,16 @@ stages [*List[Stage]*]:: Only valid if format is "json". Defines where, in the JSON hierrarchy, performance variables will be found. Supports the use of one or multiple wildcards (`*`). + -pattern [*str*]::: + Required if format is `regex`. The regular expression applied to each line. Accepts named and arbitrary capture groups. + + -variable_value_group[*str|int*]::: + Required if format is `regex` + The capture group containing the performance value to extract. Can be named or an integer. + + -variable_name_group[*str|int* (Optional)]::: + The capture group containing the performance variable name to extract. If ommited, variables are named automatically as `match_0`, `match_1`, ... + custom_variables [*List[Dict[str,str]]*] (Optional):: Contains a list of objects describing custom performance variables to create, based on extracted ones (from stages). An aggregation will be performed using provided columns and valid operations. For more information, see the xref:tutorial:advancedConfiguration.adoc[advanced Configuration] @@ -63,6 +75,24 @@ Recursive creation of custom_variables is supported! Deeply nested and complex JSON scalability files are supported, using multiple wildcard syntax! ==== + +== Extracting from standard output + +Stages may extract performance variables directly from the application standard output by setting: + +[source,json] +---- +"filepath": "stdout" +---- + +This works with any supported file format (e.g. logging a csv on the stdout). + +The top-level directory field is not required if ALL stages extract from stdout. + +[TIP] +Mixing `stdout` and file-based stages is allowed but requires `directory` to be set + + == Examples Let's assume our application exports the following files: @@ -199,4 +229,40 @@ If a full path is passed, the variable name corresponds to the key of the leaf [TIP] ==== `variables_path` can be a list. -==== \ No newline at end of file +==== + + +=== Extracting performance variables using `regex` + +Assume the application prints the following lines to standard output: + +[source,text] +---- +assembly: 0.012 +solve: 1.42 +postprocess: 0.08 +---- + +A minimal `regex` stage extracting these values from `stdout` is shown below. + +[source,json] +---- +"scalability": { + "stages": [ + { + "name": "timers", + "filepath": "stdout", + "format": "regex", + "pattern": "^(?P[^:\\n]+):\\s*(?P[-+]?[\\d.]+(?:[eE][-+]?\\d+)?)$", + "variable_name_group": "name", + "variable_value_group": "value" + } + ] +} +---- + +This configuration extracts the following performance variables: + +- timers_assembly : 0.012 +- timers_solve : 1.42 +- timers_postprocess : 0.08 diff --git a/examples/fibonacci/benchmark.json b/examples/fibonacci/benchmark.json index ff149709..3256d84e 100644 --- a/examples/fibonacci/benchmark.json +++ b/examples/fibonacci/benchmark.json @@ -15,6 +15,14 @@ "name":"", "filepath":"output.csv", "units":{ "fibonacci_number":"" } + }, + { + "name":"", + "filepath":"stdout", + "format":"regex", + "pattern": "^(?P[^:\\n]+):\\s*(?P[-+]?[\\d.]+(?:[eE][-+]?\\d+)?)$", + "variable_name_group":"name", + "variable_value_group":"value" } ] }, diff --git a/examples/fibonacci/fibonacci.py b/examples/fibonacci/fibonacci.py index 19837927..9d969e6e 100644 --- a/examples/fibonacci/fibonacci.py +++ b/examples/fibonacci/fibonacci.py @@ -45,8 +45,8 @@ def fibonacciIterative(n): os.makedirs(dirpath) with open(args.out,'w') as f: - f.write(f"elapsed,fibonacci_number\n{elapsed_time},{fib_number}") + f.write(f"fibonacci_number\n{fib_number}") + + print(f"elapsed: {elapsed_time}") - print(f"Elapsed time: {elapsed_time}") - print(f"Fibonacci number: {fib_number}") print("Done!") \ No newline at end of file diff --git a/src/feelpp/benchmarking/reframe/regression.py b/src/feelpp/benchmarking/reframe/regression.py index 3ed8d769..28be3c8f 100644 --- a/src/feelpp/benchmarking/reframe/regression.py +++ b/src/feelpp/benchmarking/reframe/regression.py @@ -18,7 +18,7 @@ class RegressionTest(ReframeSetup): def initHandlers(self): self.validation_handler = ValidationHandler(self.app_reader.config.sanity) if self.app_reader.config.scalability: - self.scalability_handler = ScalabilityHandler(self.app_reader.config.scalability) + self.scalability_handler = ScalabilityHandler(self.app_reader.config.scalability, os.path.join(self.stagedir,self.stdout.evaluate())) else: self.scalability_handler = None @@ -84,7 +84,7 @@ def setPerfVars(self): @run_before("cleanup") def removeDirectories(self): - if self.app_reader.config.scalability and self.app_reader.config.scalability.clean_directory: + if self.app_reader.config.scalability and self.app_reader.config.scalability.clean_directory and self.app_reader.config.scalability.directory: FileHandler.cleanupDirectory(self.app_reader.config.scalability.directory) if self.machine_reader.config.input_user_dir and self.app_reader.config.input_file_dependencies: DEBUG("REMOVING INPUT FILE DEPENDENCIES...") diff --git a/src/feelpp/benchmarking/reframe/scalability.py b/src/feelpp/benchmarking/reframe/scalability.py index ccb1b7d7..a177106b 100644 --- a/src/feelpp/benchmarking/reframe/scalability.py +++ b/src/feelpp/benchmarking/reframe/scalability.py @@ -9,6 +9,13 @@ def __init__(self,filepath,stage_name, units): self.stage_name = stage_name self.units = units + @staticmethod + def _tryCastFloat(x): + try: + return float(x) + except ValueError: + return x + def _getPerfVars(self,columns,vars): perf_variables = {} nb_rows = len(vars.evaluate()) @@ -120,27 +127,60 @@ def _extractVariables(self): return items.keys(),sn.defer([[sn.defer(v) for v in items.values()]]) +class RegexExtractor(Extractor): + def __init__(self, filepath, stage_name, units, pattern, variable_name_group, variable_value_group): + super().__init__(filepath, stage_name, units) + self.pattern = pattern + self.variable_name_group = variable_name_group + self.variable_value_group = variable_value_group + + def _extractVariables(self): + if self.variable_name_group: + tags = (self.variable_name_group, self.variable_value_group) + conv = (str,self._tryCastFloat) + else: + tags = self.variable_value_group + conv = self._tryCastFloat + + raw_results = sn.extractall(rf"{self.pattern}", self.filepath, tags, conv=conv) + + if self.variable_name_group: + columns = [x[0].strip() for x in raw_results] + matches = [x[1] for x in raw_results] + else: + matches = [x for x in raw_results] + columns = [f"match_{i}" for i in range(len(matches))] + + return columns, sn.defer([matches]) + class ExtractorFactory: """Factory class for extractor strategies""" @staticmethod - def create(stage,directory,index=None): - filepath = os.path.join(directory,stage.filepath) + def create(stage,directory,index=None, stdout = None): + if stage.filepath == "stdout": + filepath = stdout + else: + filepath = os.path.join(directory,stage.filepath) + if stage.format == "csv": return CsvExtractor(filepath=filepath, stage_name = stage.name, units=stage.units) elif stage.format == "tsv": return TsvExtractor(filepath=filepath,stage_name = stage.name,index=index, units=stage.units) elif stage.format == "json": return JsonExtractor(filepath=filepath,stage_name = stage.name, variables_path=stage.variables_path, units=stage.units) + elif stage.format == "regex": + return RegexExtractor(filepath=filepath,stage_name = stage.name, pattern=stage.pattern, units=stage.units, variable_name_group=stage.variable_name_group, variable_value_group=stage.variable_value_group) else: raise NotImplementedError class ScalabilityHandler: """ Class to handle scalability related attributes""" - def __init__(self,scalability_config): + def __init__(self,scalability_config, stdout = None): self.directory = scalability_config.directory self.stages = scalability_config.stages self.custom_variables = scalability_config.custom_variables + self.stdout = stdout def getPerformanceVariables(self,index=None): """ Opens and parses the performance variable values depending on the config setup. @@ -150,7 +190,7 @@ def getPerformanceVariables(self,index=None): """ perf_variables = {} for stage in self.stages: - extractor = ExtractorFactory.create(stage,self.directory,index) + extractor = ExtractorFactory.create(stage,self.directory,index, self.stdout) perf_variables.update( extractor.extract() ) return perf_variables diff --git a/src/feelpp/benchmarking/reframe/schemas/scalability.py b/src/feelpp/benchmarking/reframe/schemas/scalability.py index f747be22..4763db20 100644 --- a/src/feelpp/benchmarking/reframe/schemas/scalability.py +++ b/src/feelpp/benchmarking/reframe/schemas/scalability.py @@ -3,12 +3,23 @@ class Stage(BaseModel): - name:str + name:Optional[str] = None filepath:str - format:Optional[Literal["csv","tsv","json"]] = None + format:Optional[Literal["csv","tsv","json","regex"]] = None variables_path:Optional[Union[str,List[str]]] = [] units: Optional[Dict[str,str]] = {} + pattern: Optional[str] = None + variable_value_group: Optional[Union[str,int]] = None + variable_name_group: Optional[Union[str,int]] = None + + @field_validator("name",mode="after") + @classmethod + def defaultName(cls,v): + if v is None: + return "" + return v + @field_validator("units",mode="before") @classmethod def parseUnits(cls,v): @@ -28,7 +39,12 @@ def checkFormatOptions(self): raise ValueError("variables_path must be specified if format == json") if type(self.variables_path) == str: self.variables_path = [self.variables_path] - elif self.format != "json": + elif self.format == "regex": + if not self.pattern: + raise ValueError("regex must be specified if format == regex") + if not self.variable_value_group: + raise ValueError("variable_value_group must be specified if format == regex") + else: if self.variables_path: raise ValueError("variables_path cannot be specified with other format than json") return self @@ -40,7 +56,17 @@ class CustomVariable(BaseModel): unit: str class Scalability(BaseModel): - directory: str + directory: Optional[str] = None stages: List[Stage] custom_variables:Optional[List[CustomVariable]] = [] clean_directory: Optional[bool] = False + + @model_validator(mode = "after") + def checkOptionalDirectory(self): + if self.directory is None: + #Directory should be specified if any stage has filename other than stdout + for stage in self.stages: + if stage.filepath != "stdout": + raise ValueError("Directory should be specified for non-stdout output files") + + return self \ No newline at end of file diff --git a/src/feelpp/benchmarking/reframe/setup.py b/src/feelpp/benchmarking/reframe/setup.py index ea779fb0..6334a6f4 100644 --- a/src/feelpp/benchmarking/reframe/setup.py +++ b/src/feelpp/benchmarking/reframe/setup.py @@ -161,7 +161,7 @@ def setResources(self): @run_before('run') def cleanupDirectories(self): - if self.app_reader.config.scalability: + if self.app_reader.config.scalability and self.app_reader.config.scalability.directory: FileHandler.cleanupDirectory(self.app_reader.config.scalability.directory) @run_before('run') diff --git a/tests/scalability/test_scalabilityHandler.py b/tests/scalability/test_scalabilityHandler.py index f5d35e03..33cac8ee 100644 --- a/tests/scalability/test_scalabilityHandler.py +++ b/tests/scalability/test_scalabilityHandler.py @@ -2,7 +2,7 @@ import pytest import tempfile, json -from feelpp.benchmarking.reframe.scalability import ScalabilityHandler, CsvExtractor,TsvExtractor,JsonExtractor,Extractor,ExtractorFactory +from feelpp.benchmarking.reframe.scalability import ScalabilityHandler, CsvExtractor,TsvExtractor,JsonExtractor,RegexExtractor,Extractor,ExtractorFactory import numpy as np class StageMocker: @@ -83,6 +83,33 @@ def test_extractTsv(self): file.close() + + def test_extractRegex(self): + """ Test extracting performance variables using regex from a file """ + + file = tempfile.NamedTemporaryFile(mode="w+") + content = "assembly: 0.012\nsolve: 1.42\npostprocess: 0.08" + file.write(content) + file.flush() + + pattern = r"^(?P[^:]+):\s*(?P[\d.]+)$" + extractor = RegexExtractor( + filepath=file.name, + stage_name="timers", + pattern=pattern, + variable_name_group="name", + variable_value_group="value", + units={"*":"s"} + ) + perfvars = extractor.extract() + assert perfvars["timers_assembly"].evaluate() == 0.012 + assert perfvars["timers_solve"].evaluate() == 1.42 + assert perfvars["timers_postprocess"].evaluate() == 0.08 + + file.close() + + + def test_extractJson(self): """ Test performance variable extraction for JSON files""" file = tempfile.NamedTemporaryFile() diff --git a/tests/scalability/test_scalabilityValidation.py b/tests/scalability/test_scalabilityValidation.py index 6a3d6caf..fb68afc1 100644 --- a/tests/scalability/test_scalabilityValidation.py +++ b/tests/scalability/test_scalabilityValidation.py @@ -25,5 +25,29 @@ def test_format(self): stage = Stage(**{"name":"test_stage","filepath":"test_filepath","format":"csv"}) assert stage.variables_path == [] + def test_regex_validation(self): + """ Tests mandatory regex fields and named/numbered groups """ + # Missing pattern + with pytest.raises(ValidationError, match="regex must be specified if format == regex"): + Stage(**{"name": "r_stage", "filepath": "file.txt", "format": "regex", "variable_value_group": "value"}) + + # Missing variable_value_group + with pytest.raises(ValidationError, match="variable_value_group must be specified if format == regex"): + Stage(**{"name": "r_stage", "filepath": "file.txt", "format": "regex", "pattern": ".*"}) + + # Valid named capture groups + stage = Stage( + name="r_stage", + filepath="file.txt", + format="regex", + pattern="^(?P[^:]+):\\s*(?P[\\d.]+)$", + variable_name_group="name", + variable_value_group="value" + ) + assert stage.format == "regex" + assert stage.variable_name_group == "name" + assert stage.variable_value_group == "value" + + class TestAppOutput: pass