Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,6 @@ def forward(
hidden_states = self.embedding.forward(input_ids)
residual = None
for idx, layer in enumerate(self.layers):
# print('layer: ',idx)
hidden_states, residual = layer(
hidden_states,
context=context,
Expand Down Expand Up @@ -295,12 +294,10 @@ def __init__(
factory_kwargs = {"device": device, "dtype": dtype}

super().__init__()
# print(vocab_size)
# if vocab_size % pad_vocab_size_multiple != 0:
# vocab_size += pad_vocab_size_multiple - (
# vocab_size % pad_vocab_size_multiple
# )
# print(vocab_size)

self.backbone = MixerModel(
d_model=d_model,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@ def forward(
input_ids=context_tokens,
mask=source_attention_mask,
)
# print(source_vec.dtype, source_attention_mask.dtype)
cache = self.allocate_inference_cache(
batch_size=b,
max_seqlen=300 + l + 1, # source + BOS
Expand All @@ -153,10 +152,6 @@ def forward(
# batch, seqlen, dim = self.decoder.backbone.embedding.forward(input_ids).shape
# conv_state, ssm_state = self.decoder.backbone.layers[0].mixer._get_states_from_cache(inference_params, b)
# inference_params = None
# print(conv_state.type(),input_ids.type(), source_vec.type())
# print(source_attention_mask.type(), target_attention_mask.type())
# print(position_ids.type())
# print(num_last_tokens)

out = self.decoder.forward(
input_ids,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,6 @@ def forward(self , src, tgt):
dec_output = decod_embed
for decoder in self.decoder:
dec_output = decoder(dec_output , enc_output , src_mask , tgt_mask )
# print("Decoder output " , dec_output)
out = self.ff(dec_output)

# out = self.ff(dec_output)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
# break

# sqampls_prefix = []
# print("Loading squared amplitudes")
# for exp in tqdm(sqampl_raw):
# # simplified = sp.factor(exp) # worked best for simplification
# # prefix = sympy_to_prefix(simplified)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,22 +95,17 @@ def _queue_mgr(func_str: str, q_in: mp.Queue, q_out: mp.Queue, timeout: int, pid
proc = mp.Process(target=_lemmiwinks, args=(func_str, (x,), {}, q_worker,))
proc.start()
try:
# print(f'[{pid}]: {positioning}: getting')
res = q_worker.get(timeout=timeout)
# print(f'[{pid}]: {positioning}: got')
q_out.put((positioning, res))
except mpq.Empty:
q_out.put((positioning, sp.sympify(x)))
# print(f'[{pid}]: {positioning}: timed out ({timeout}s)')
with open(timeout_logfile, "a") as f:
f.write("Timed out after "+str(timeout)+" seconds. Argument:" + x + "\n")
finally:
try:
proc.terminate()
# print(f'[{pid}]: {positioning}: terminated')
except:
pass
# print(f'[{pid}]: completed!')


def killer_pmap(func: Callable, iterable: Iterable, cpus: Optional[int] = None, timeout: int = 10*60,
Expand Down Expand Up @@ -140,7 +135,7 @@ def killer_pmap(func: Callable, iterable: Iterable, cpus: Optional[int] = None,
mp.Process(target=_queue_mgr, args=(dill.dumps(func), q_in, q_out, timeout, pid, timeout_logfile))
for pid in range(cpus)
]
# print(f'Started {len(processes)} processes')

for proc in processes:
proc.start()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ def get_unique_indices(l):
print("Resuming calculations, reading progress from "+progress_file)
with open(progress_file) as f:
progess_file_contents = [line for line in f.readlines()]
# print(progess_file_contents[-6:])
batch_resume = int(progess_file_contents[-7].split(":")[1]) + 1
index_resume = int(progess_file_contents[-3].split(":")[1])
batch_size_resume = int(progess_file_contents[-2].split(":")[1])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,6 @@ def fix_i_match(matchobj):
"""
match = matchobj.group(0)
return match.replace("i", "I")
# if int(match[1]) % 2 != 0:
# print("asdf")
# exponent = int(match[1]) // 2
# m, m_name = match[0].split("_")
# if exponent == 1:
# return m+"2"+m_name
# else:
# return m+"2"+m_name + "**" + str(exponent)


def combine_m_match(matchobj):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
# break

# sqampls_prefix = []
# print("Loading squared amplitudes")
# for exp in tqdm(sqampl_raw):
# # simplified = sp.factor(exp) # worked best for simplification
# # prefix = sympy_to_prefix(simplified)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,22 +95,17 @@ def _queue_mgr(func_str: str, q_in: mp.Queue, q_out: mp.Queue, timeout: int, pid
proc = mp.Process(target=_lemmiwinks, args=(func_str, (x,), {}, q_worker,))
proc.start()
try:
# print(f'[{pid}]: {positioning}: getting')
res = q_worker.get(timeout=timeout)
# print(f'[{pid}]: {positioning}: got')
q_out.put((positioning, res))
except mpq.Empty:
q_out.put((positioning, sp.sympify(x)))
# print(f'[{pid}]: {positioning}: timed out ({timeout}s)')
with open(timeout_logfile, "a") as f:
f.write("Timed out after "+str(timeout)+" seconds. Argument:" + x + "\n")
finally:
try:
proc.terminate()
# print(f'[{pid}]: {positioning}: terminated')
except:
pass
# print(f'[{pid}]: completed!')


def killer_pmap(func: Callable, iterable: Iterable, cpus: Optional[int] = None, timeout: int = 10*60,
Expand Down Expand Up @@ -140,7 +135,7 @@ def killer_pmap(func: Callable, iterable: Iterable, cpus: Optional[int] = None,
mp.Process(target=_queue_mgr, args=(dill.dumps(func), q_in, q_out, timeout, pid, timeout_logfile))
for pid in range(cpus)
]
# print(f'Started {len(processes)} processes')

for proc in processes:
proc.start()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ def get_unique_indices(l):
print("Resuming calculations, reading progress from "+progress_file)
with open(progress_file) as f:
progess_file_contents = [line for line in f.readlines()]
# print(progess_file_contents[-6:])
batch_resume = int(progess_file_contents[-7].split(":")[1]) + 1
index_resume = int(progess_file_contents[-3].split(":")[1])
batch_size_resume = int(progess_file_contents[-2].split(":")[1])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,6 @@ def fix_i_match(matchobj):
"""
match = matchobj.group(0)
return match.replace("i", "I")
# if int(match[1]) % 2 != 0:
# print("asdf")
# exponent = int(match[1]) // 2
# m, m_name = match[0].split("_")
# if exponent == 1:
# return m+"2"+m_name
# else:
# return m+"2"+m_name + "**" + str(exponent)


def combine_m_match(matchobj):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,6 @@ def fix_i_match(matchobj):
"""
match = matchobj.group(0)
return match.replace("i", "I")
# if int(match[1]) % 2 != 0:
# print("asdf")
# exponent = int(match[1]) // 2
# m, m_name = match[0].split("_")
# if exponent == 1:
# return m+"2"+m_name
# else:
# return m+"2"+m_name + "**" + str(exponent)


def combine_m_match(matchobj):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,6 @@ def fix_i_match(matchobj):
"""
match = matchobj.group(0)
return match.replace("i", "I")
# if int(match[1]) % 2 != 0:
# print("asdf")
# exponent = int(match[1]) // 2
# m, m_name = match[0].split("_")
# if exponent == 1:
# return m+"2"+m_name
# else:
# return m+"2"+m_name + "**" + str(exponent)


def combine_m_match(matchobj):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -591,14 +591,6 @@ def is_index(s: str):


if __name__ == "__main__":
# with open(sqampls_file) as f:
# ampls = f.readlines()
#
# print(ampls[0])
# print(ampls[100])
# print(ampls[-1])
# print(ampls[-20])

with open(ampls_raw_file) as f:
ampls_raw = f.readlines(100000)
ampls_raw = [a[:-1] for a in ampls_raw]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,6 @@ def tree_to_sympy(tree, expression=None):
else:
node = tree._label
op = operators_inv[node]
# num_args = operators_nargs[node]
# if num_args != len(tree):
# print("num args not len(tree):")
# ic(num_args)
# ic(len(tree))
# ic(tree)
# assert num_args == len(tree)
return op(*[tree_to_sympy(t) for t in tree])
return 0

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,15 +282,12 @@ def decode_float(self,seq):
if x[0] not in ["-", "+", "E", "N"]:
return np.nan
try:
# print(val)
sign = 1 if val[0] == "+" else -1
mant = ""
for x in val[1:-1]:
mant += x[1:]
mant = int(mant)
# print(mant)
exp = int(val[-1][1:])
# print(exp)
value = sign * mant * (10 ** exp)
value = float(value)
except Exception:
Expand Down
3 changes: 0 additions & 3 deletions SYMBA_REG/SYMBREG_DPO+PIGP_Samyak_Jha/DPOFineTuning/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,8 @@ def __init__(self,
self.register_buffer('pos_embedding_1', self.pos_embedding)

def forward(self, token_embedding):
# print(token_embedding.shape)
token_embedding = token_embedding.to('cuda')
self.pos_embedding = self.pos_embedding.to('cuda')
# token_embedding = token_embedding
# self.pos_embedding = self.pos_embedding
return self.dropout(token_embedding + self.pos_embedding[:,:token_embedding.size(1), :])


Expand Down
3 changes: 0 additions & 3 deletions SYMBA_REG/SYMBREG_DPO+PIGP_Samyak_Jha/PIGP/Tokenizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,15 +260,12 @@ def decode_float(self,seq):
if x[0] not in ["-", "+", "E", "N"]:
return np.nan
try:
# print(val)
sign = 1 if val[0] == "+" else -1
mant = ""
for x in val[1:-1]:
mant += x[1:]
mant = int(mant)
# print(mant)
exp = int(val[-1][1:])
# print(exp)
value = sign * mant * (10 ** exp)
value = float(value)
except Exception:
Expand Down
3 changes: 0 additions & 3 deletions SYMBA_REG/SYMBREG_DPO+PIGP_Samyak_Jha/PIGP/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,8 @@ def __init__(self,
self.register_buffer('pos_embedding_1', self.pos_embedding)

def forward(self, token_embedding):
# print(token_embedding.shape)
token_embedding = token_embedding.to('cuda')
self.pos_embedding = self.pos_embedding.to('cuda')
# token_embedding = token_embedding
# self.pos_embedding = self.pos_embedding
return self.dropout(token_embedding + self.pos_embedding[:,:token_embedding.size(1), :])


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,15 +282,12 @@ def decode_float(self,seq):
if x[0] not in ["-", "+", "E", "N"]:
return np.nan
try:
# print(val)
sign = 1 if val[0] == "+" else -1
mant = ""
for x in val[1:-1]:
mant += x[1:]
mant = int(mant)
# print(mant)
exp = int(val[-1][1:])
# print(exp)
value = sign * mant * (10 ** exp)
value = float(value)
except Exception:
Expand Down
3 changes: 0 additions & 3 deletions SYMBA_REG/SYMBREG_DPO+PIGP_Samyak_Jha/SymbolicDPO/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,8 @@ def __init__(self,
self.register_buffer('pos_embedding_1', self.pos_embedding)

def forward(self, token_embedding):
# print(token_embedding.shape)
token_embedding = token_embedding.to('cuda')
self.pos_embedding = self.pos_embedding.to('cuda')
# token_embedding = token_embedding
# self.pos_embedding = self.pos_embedding
return self.dropout(token_embedding + self.pos_embedding[:,:token_embedding.size(1), :])


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,18 @@ def get_args_parser():
# """
# total = len(list(iterable))
# # Progress Bar Printing Function
# print(total)
# def printProgressBar (iteration):
# percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
# filledLength = int(length * iteration // total)
# bar = fill * filledLength + '-' * (length - filledLength)
# print(f'\rProgress |{bar}| {percent}% Complete', end = printEnd)
# # Initial Call
# printProgressBar(0)
# # Update Progress Bar
# for i, item in enumerate(iterable):
# yield item
# printProgressBar(i + 1)
# # Print New Line on Complete
# print()


def main(args):
df = pd.read_csv(args.dataframe_path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ def get_initial_population(pop_size, candidates, pset, toolbox):
for i in range(pop_size - len(population)):
random_candidate = toolbox.individual()
population.append(random_candidate)
#print(population)
return population

@staticmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def validate_expressions(self, expressions, num_vars):
for expression in expressions:
try:
expression = expression.type(torch.long)[0].tolist()
# print(f"{expression=}")
expression = self.tokenizer.reverse_tokenize([expression[1:-1]])[0]
#expression = prefix_to_sympy(expressions)
if expression not in valid:
Expand Down Expand Up @@ -98,10 +97,8 @@ def get_candidate_equations_single(self, X, y):
num_vars = X.shape[1]
x, num_array = self.format_data_for_transformer(X, y)
expressions = self.generate_expressions(x, num_array)
#print(f"{expressions[0]=}")

expressions = self.validate_expressions(expressions, num_vars)
#print(f"{expressions[0]=}")
candidates = []
for expression in expressions:
try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,6 @@ def test_seq_acc(self):
y_pred = predictor.predict(src[0].unsqueeze(0), num_array[0].unsqueeze(0)) #only one example from each batch
y_preds.append(y_pred.cpu().numpy())
y_true.append(np.trim_zeros(tgt[0]))
# print("pred", y_pred.cpu().tolist())
# print("true", y_true[-1].tolist())

test_accuracy_seq = sequence_accuracy(y_true, y_preds)
f= open(os.path.join(self.logs_dir, "score.txt"),"w+")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ def __init__(self, vocab_size: int, emb_size):

def forward(self, tokens, num_array):
out = self.embedding(tokens.long()) * math.sqrt(self.emb_size)
# print("embeds", out.shape)
out = self.layer_norm(out)
out *= num_array.unsqueeze(-1)
return out
Expand All @@ -68,15 +67,12 @@ def forward(self, tokens, num_array):
#dims = torch.tensor(out.size(1)*out.size(2)*out.size(3))
#mag_norm = 5/torch.sqrt(dims)
#out += torch.zeros_like(out).uniform_(-mag_norm, mag_norm)
#print("embed", out.shape)
#print("num", num_array.shape)
bs, n = out.shape[0], out.shape[1]
out *= num_array.unsqueeze(-1)
out = out.view(bs, n, -1)
out = self.activation(self.fc1(out))
out = self.dropout(out)
out = self.fc2(out)
#print("out", out.shape)
return out

class Model(nn.Module):
Expand Down
Loading