-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.py
More file actions
81 lines (62 loc) · 2.6 KB
/
run.py
File metadata and controls
81 lines (62 loc) · 2.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import sys
import yaml
from box import Box
import pandas as pd
import time
sys.path.insert(0, 'src')
from data.make_dataset import download_data, generate_data, save_data, generate_tweet
from utils.download_models import download_models
from models.train import train
from models.test import test, prediction
import logging
def main(args):
"""
Function to run the whole project. This function allows arguments for different
purposes. The function will make a run from top to bottom.
Return None.
Parameters:
args: **args, arguments to define the purpose of the run.
"""
logging.basicConfig(filename='myapp.log', level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
logging.info(args)
if 'generate_data' in args: # will not be run in testrun for submission
logging.info('loading data-params...')
with open('config/data-params.yml', 'r') as file: # All config will be read in module files
data_config = Box(yaml.full_load(file))
logging.info(data_config) # here only for logging
download_data()
df = generate_data()
save_data(df)
if 'download_models' in args: # run for dev and testing
with open('config/model_config.yml', 'r') as file:
model_config = Box(yaml.full_load(file))
logging.info(model_config)
download_models()
with open('config/model_config.yml', 'r') as file: # run by default for submission requirements
model_config = Box(yaml.full_load(file))
logging.info(model_config)
download_models()
if 'train' in args: # will not be run in testrun for submission
logging.info('loading training-params...')
with open('config/train-params.yml', 'r') as file:
train_config = Box(yaml.full_load(file))
logging.info(train_config)
start = time.time()
trainer = train()
end = time.time()
logging.info('training time: ' + str(end - start))
if 'test' in args: # test on test dataset
logging.info('test run start...')
test(test_target = 'test', test_lines = 3)
elif 'test_run' in args: # test run for submission
logging.info('testing start...')
test(test_target = 'testing', test_lines = 20)
if 'predict' in args:
logging.info('fetching and processing twitter data...')
generate_tweet()
logging.info('prediction on tweets start...')
prediction()
return
if __name__ == '__main__':
main(sys.argv[1:])