-
Notifications
You must be signed in to change notification settings - Fork 69
Expand file tree
/
Copy pathtasks.py
More file actions
276 lines (220 loc) · 8.73 KB
/
tasks.py
File metadata and controls
276 lines (220 loc) · 8.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
import inspect
import operator
import os
import shutil
import stat
import sys
from pathlib import Path
import shlex
import tomli
from invoke import task
from packaging.requirements import Requirement
from packaging.version import Version
COMPARISONS = {'>=': operator.ge, '>': operator.gt, '<': operator.lt, '<=': operator.le}
EGG_STRING = '#egg='
if not hasattr(inspect, 'getargspec'):
inspect.getargspec = inspect.getfullargspec
@task
def check_dependencies(c):
c.run('python -m pip check')
@task
def unit(c):
c.run('python -m pytest ./tests/unit --cov=sdgym --cov-report=xml:./unit_cov.xml')
@task
def integration(c):
c.run('python -m pytest ./tests/integration --cov=sdgym --cov-report=xml:./integration_cov.xml')
@task
def readme(c):
test_path = Path('tests/readme_test')
if test_path.exists() and test_path.is_dir():
shutil.rmtree(test_path)
cwd = os.getcwd()
os.makedirs(test_path, exist_ok=True)
shutil.copy('README.md', test_path / 'README.md')
os.chdir(test_path)
c.run('rundoc run --single-session python3 -t python3 README.md')
os.chdir(cwd)
shutil.rmtree(test_path)
def _get_minimum_versions(dependencies, python_version):
min_versions = {}
for dependency in dependencies:
if '@' in dependency:
name, url = dependency.split(' @ ')
min_versions[name] = f'{url}{EGG_STRING}{name}'
continue
req = Requirement(dependency)
if ';' in dependency:
marker = req.marker
if marker and not marker.evaluate({'python_version': python_version}):
continue # Skip this dependency if the marker does not apply to the current Python version
if req.name not in min_versions:
min_version = next(
(spec.version for spec in req.specifier if spec.operator in ('>=', '==')), None
)
if min_version:
min_versions[req.name] = f'{req.name}=={min_version}'
elif '@' not in min_versions[req.name]:
existing_version = Version(min_versions[req.name].split('==')[1])
new_version = next(
(spec.version for spec in req.specifier if spec.operator in ('>=', '==')),
existing_version,
)
if new_version > existing_version:
min_versions[req.name] = (
f'{req.name}=={new_version}' # Change when a valid newer version is found
)
return min_versions
def _get_extra_dependencies(pyproject_data):
"""Get the dependencies for optional synthesizers.
This function assumes that all external synthesizers we add will have an optional dependency
section defined and that the section will be listed in the '[test]' optional dependency.
Args:
pyproject_data (dict):
Dictionary representation of our pyproject.toml file.
Returns:
list:
A list of dependency strings (ie. numpy>=x.y.z).
"""
optional_dependencies = pyproject_data.get('project', {}).get('optional-dependencies', {})
test_dependencies = optional_dependencies.get('test', [])
extra_dependencies = []
start_token = 'sdgym['
for dep in test_dependencies:
if dep.startswith(start_token):
synthesizer = dep[len(start_token): -1]
extra_dependencies.extend(optional_dependencies.get(synthesizer))
return extra_dependencies
def _get_version_from_requirement(requirement):
requirement = requirement.strip()
equal_index = requirement.find('==')
version_number = requirement[equal_index + 2:]
return Version(version_number)
def _resolve_version_conflicts(dependencies, extra_dependencies):
"""Pick the highest version of two minimums.
Args:
dependencies (dict):
A dictionary mapping dependency names to the version.
extra_dependencies (dict):
A dictionary mapping the optional dependency names to the version.
Returns:
list:
A list of dependency strings (ie. numpy>=x.y.z).
"""
all_dependencies = set(dependencies.keys()).union(set(extra_dependencies.keys()))
selected_versions = []
for dep in all_dependencies:
if dep in dependencies and dep in extra_dependencies:
requirement1 = dependencies.get(dep)
requirement2 = extra_dependencies.get(dep)
if EGG_STRING in requirement1:
selected_versions.append(requirement1)
continue
if EGG_STRING in requirement2:
selected_versions.append(requirement2)
continue
version1 = _get_version_from_requirement(requirement1)
version2 = _get_version_from_requirement(requirement2)
max_version = requirement1 if version1 > version2 else requirement2
selected_versions.append(max_version)
else:
selected_versions.append(dependencies.get(dep, extra_dependencies.get(dep)))
return selected_versions
@task
def install_minimum(c):
with open('pyproject.toml', 'rb') as pyproject_file:
pyproject_data = tomli.load(pyproject_file)
dependencies = pyproject_data.get('project', {}).get('dependencies', [])
extra_synthesizer_dependencies = _get_extra_dependencies(pyproject_data)
python_version = '.'.join(map(str, sys.version_info[:2]))
minimum_versions = _get_minimum_versions(dependencies, python_version)
extra_minimum_versions = _get_minimum_versions(extra_synthesizer_dependencies, python_version)
minimum_versions = _resolve_version_conflicts(minimum_versions, extra_minimum_versions)
if minimum_versions:
install_deps = ' '.join(minimum_versions)
c.run(f'python -m pip install {install_deps}')
@task
def minimum(c):
install_minimum(c)
check_dependencies(c)
unit(c)
integration(c)
@task
def lint(c):
check_dependencies(c)
c.run('ruff check .')
c.run('ruff format --check --diff .')
@task
def fix_lint(c):
check_dependencies(c)
c.run('ruff check --fix .')
c.run('ruff format .')
def remove_readonly(func, path, _):
"""Clear the readonly bit and reattempt the removal"""
os.chmod(path, stat.S_IWRITE)
func(path)
@task
def rmdir(c, path):
try:
shutil.rmtree(path, onerror=remove_readonly)
except PermissionError:
pass
@task
def run_sdgym_benchmark(c, modality='single_table'):
"""Run the SDGym benchmark."""
c.run(f'python sdgym/run_benchmark/run_benchmark.py --modality {modality}')
@task
def upload_benchmark_results(c, modality='single_table'):
"""Upload the benchmark results to S3."""
c.run(f'python sdgym/run_benchmark/upload_benchmark_results.py --modality {modality}')
@task
def notify_sdgym_benchmark_uploaded(c, folder_name, commit_url=None, modality='single_table'):
"""Notify Slack about the SDGym benchmark upload."""
from sdgym.run_benchmark.utils import post_benchmark_uploaded_message
post_benchmark_uploaded_message(folder_name, commit_url, modality)
@task
def launch_benchmark(
c,
config_filepath=None,
modality=None,
datasets=None,
synthesizers=None,
num_instances=None,
output_destination=None,
timeout=None,
):
"""Launch the SDGym benchmark through the benchmark launcher script.
This task forwards the provided arguments to
``sdgym/_benchmark_launcher/script.py``.
The launcher supports two modes:
- If ``config_filepath`` is provided, the benchmark configuration is loaded
from that file.
- Otherwise, the configuration is built from the remaining arguments. In
this case, ``modality`` and ``output_destination`` are required.
When building the configuration from arguments:
- If ``datasets``, ``synthesizers``, and ``num_instances`` are all omitted,
the default monthly benchmark configuration for the selected modality is
used.
- If ``num_instances`` is omitted, it defaults to ``1``.
- If ``datasets`` or ``synthesizers`` is omitted, the corresponding
defaults from the monthly benchmark configuration are used.
"""
command = ['python', 'sdgym/_benchmark_launcher/script.py']
arguments = [
('--config-filepath', config_filepath),
('--modality', modality),
('--datasets', datasets),
('--synthesizers', synthesizers),
('--num-instances', num_instances),
('--output-destination', output_destination),
('--timeout', timeout),
]
for flag, value in arguments:
if value is None:
continue
if isinstance(value, list):
command.append(flag)
command.extend(str(item) for item in value)
else:
command.extend([flag, str(value)])
quoted_command = ' '.join(shlex.quote(part) for part in command)
c.run(quoted_command)