-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMultihead_train_torch_sequential_modifymuchpossible.gin
More file actions
106 lines (89 loc) · 7.79 KB
/
Multihead_train_torch_sequential_modifymuchpossible.gin
File metadata and controls
106 lines (89 loc) · 7.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import multihead_attention_model_torch_sequential_modifymuchpossible
import multihead_attention_model_torch_boost_modifymuchpossible
import multihead_attention_model_torch_early_fusion_modifymuchpossible
#model parameters
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.nb_classes = 7
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.dim_attention = 80
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.headnum = 5
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.pooling_size = 8
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.drop_input = 0.3
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.drop_cnn = 0.3
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.drop_flat = 0.40
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.W1_regularizer = 0.001
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.Att_regularizer_weight = 0.001
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.fc_dim = 100
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.normalizeatt = True
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.attmod = "smooth"
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.sharp_beta = 1
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.activation = "gelu"#leaky, relu
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.activation_att = "tanh"
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.input_dim = 5
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.attention = False## same time
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.pool_type = "max"
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.cnn_scaler = 1 #pair
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.parnet_dim = 256
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.hidden = 256 #pair
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.att_type = "self_attention" ## pair, self_attention
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.optimizer = "torch.optim.Adam"
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.lr = 0.001
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.class_weights = False #torch.FloatTensor([[1,1,7,1,3,5,8]])
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.gradient_clip = False
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.weight_decay = 1e-5
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.pooling_opt = True
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.filter_length1 = 3
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.release_layers = 7
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.prediction = False
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.fc_layer = False
multihead_attention_model_torch_sequential_modifymuchpossible.myModel1.mode = "full"
#early fusion
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.nb_classes = 7
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.dim_attention = 80
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.headnum = 5
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.nb_filters = 64#64
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.filters_length1 = 3
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.filters_length2 = 20
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.filters_length3 = 49
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.pooling_size = 8
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.drop_input = 0.26
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.drop_cnn = 0.25
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.drop_flat = 0.25
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.W1_regularizer = 0.001
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.Att_regularizer_weight = 0.001
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.fc_dim = 100
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.normalizeatt = True
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.attmod = "smooth"
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.sharp_beta = 1
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.batchnorm = False
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.predict = True
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.activation = "gelu"#leaky, relu
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.activation_att = "tanh"
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.input_dim = 4
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.attention = True## same time
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.pool_type = "max"
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.cnn_scaler = 3 #pair
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.hidden = 32 #pair
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.att_type = "self_attention" ## pair, self_attention
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.optimizer = "torch.optim.Adam"
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.lr = 0.001
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.class_weights = False #torch.FloatTensor([[1,1,7,1,3,5,8]])
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.gradient_clip = False
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.pooling = True
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.device = "cuda"
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.fusion = False
multihead_attention_model_torch_early_fusion_modifymuchpossible.myModel2.mode = "full"
#booster
multihead_attention_model_torch_boost_modifymuchpossible.myModel_fusion.model1 = None
multihead_attention_model_torch_boost_modifymuchpossible.myModel_fusion.model2 = None
multihead_attention_model_torch_boost_modifymuchpossible.myModel_fusion.hidden = 256
multihead_attention_model_torch_boost_modifymuchpossible.myModel_fusion.fc_dim = 100
multihead_attention_model_torch_boost_modifymuchpossible.myModel_fusion.nb_classes = 7
multihead_attention_model_torch_boost_modifymuchpossible.myModel_fusion.drop_flat = 0.40
multihead_attention_model_torch_boost_modifymuchpossible.myModel_fusion.lr = 0.001
multihead_attention_model_torch_boost_modifymuchpossible.myModel_fusion.gradient_clip = False
multihead_attention_model_torch_boost_modifymuchpossible.myModel_fusion.class_weights = False
multihead_attention_model_torch_boost_modifymuchpossible.myModel_fusion.optimizer = "torch.optim.Adam"
multihead_attention_model_torch_boost_modifymuchpossible.myModel_fusion.weight_decay = 1e-5
multihead_attention_model_torch_boost_modifymuchpossible.myModel_fusion.cnn_scaler = 3
multihead_attention_model_torch_boost_modifymuchpossible.myModel_fusion.attention = True
multihead_attention_model_torch_boost_modifymuchpossible.myModel_fusion.att_type = "self_attention"
multihead_attention_model_torch_boost_modifymuchpossible.myModel_fusion.mode = "fusion"