-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathqLearningAgent.py
More file actions
171 lines (145 loc) · 4.65 KB
/
Copy pathqLearningAgent.py
File metadata and controls
171 lines (145 loc) · 4.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# William Hartog
# 12/16/18
from generalAgent import generalAgent
import random
HOLD = 0
SELL = -1
BUY = 1
class qLearningAgent(generalAgent):
def __init__(self):
generalAgent.__init__(self)
# 10 states corresponding to percent diff in sma_10 vs current price
self.states = range(10)
# initialized at 0 for each symbol but will change based on past prices
self.currentStates = {}
for symbol in self.symbols:
self.currentStates[symbol] = 0
self.qValues = {}
# initialize all qValues over all symbols to 0
for symbol in self.symbols:
self.qValues[symbol] = {}
for state in self.states:
for action in self.actions:
self.qValues[symbol][state, action] = 0
self.past_actions = {} # dictionary of past actions
# learning rate alpha for q value updates
self.alpha = 1.0
self.discount = 0.99
# epsilon, prob choose random action instead of one w best q - goes down over time
self.epsilon = 1.
# variable for whether or not to shuffle actions before sampling in calculation
# of best action in decide()
self.shuffle = False
# dictionary of past q_values
# self.past_q = {}
# same state assignments/space size as naiveBayes approach
def assign_state(self, percent):
absol = abs(percent)
state = 0
if absol > .4:
if percent < 0:
state = 0
else:
state = 9
elif absol <= .4 and absol > .3:
if percent < 0:
state = 1
else:
state = 8
elif absol <= .3 and absol > .2:
if percent < 0:
state = 2
else:
state = 7
elif absol <= .2 and absol > .1:
if percent < 0:
state = 3
else:
state = 6
else:
if percent < 0:
state = 4
else:
state = 5
return state
def state_from_symbol(self, symbol):
sma = self.simple_moving_average(symbol)
last_price = (self.past_prices[symbol])[0]
percent = 100 * (last_price - sma) / sma
state = self.assign_state(percent)
return state
# same reward as in approxAgent, but condensed a bit
def reward(self, symbol):
# basically, what is price now, what was price before, how much did I gain or lose with what we did
action = self.past_actions[symbol]
diff = float(self.past_prices[symbol][0]) - float(self.past_prices[symbol][1])
# condenses reward calculation from if statements - want positive if BUY,
# negative if SELL, 0 if HOLD
reward = action * diff
return reward
def updateValues(self, feature_dict, past_prices):
# generalAgent.updateValues(feature_dict, past_prices)
self.feature_dict = feature_dict
self.past_prices = past_prices
for symbol in self.symbols:
self.currentStates[symbol] = self.state_from_symbol(symbol)
def compute_Q(self, action, symbol):
state = self.currentStates[symbol]
q_value = self.qValues[symbol][state, action]
return q_value
# this function to produce a shuffled list so for a list of actions decide
# doesn't always take the first if the values are all 0
def shuffle(self, lst):
old = list(lst)
new = []
while len(old) > 0:
i = random.randint(0, len(old) - 1)
new.append(old.pop(i))
return new
def decide(self):
actions = []
# decide an action for each symbol, with prob epsilon of random action
for symbol in self.symbols:
best_action = BUY
if (random.uniform(0,1) > self.epsilon):
best_q = -float('inf')
# either rearrange actions before sampling or always sample in fixed order
if self.shuffle:
actions_to_investigate = self.shuffle(self.actions)
else:
actions_to_investigate = self.actions
for action in actions_to_investigate:
q_val = self.compute_Q(action, symbol)
if q_val > best_q:
best_q = q_val
best_action = action
else:
best_action = random.randint(-1, 1)
actions.append(best_action)
self.past_actions[symbol] = best_action
# update epsilon
self.epsilon *= 0.95
# store past actions so can calculate reward in updateQValues
# for i, symbol in enumerate(self.symbols):
# self.past_actions[symbol] = actions[i]
self.updateQValues()
return actions
def updateQValues(self):
for symbol in self.symbols:
state = self.currentStates[symbol]
best_q = float("-inf")
best_action = BUY
# first, find the max of the q values going from the state we are at now
# still necessary thanks to epsilon-greedy method
for action in self.actions:
q_val = self.compute_Q(action, symbol)
if q_val > best_q:
best_q = q_val
best_action = action
last_action = self.past_actions[symbol]
prev_q = self.compute_Q(last_action, symbol)
sample = self.reward(symbol) + self.discount * best_q
new_q = (1. - self.alpha) * prev_q + self.alpha * sample
self.qValues[symbol][state, last_action] = new_q
# update learning rate
self.alpha *= (1. / 1.1)