-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinit.py
More file actions
26 lines (22 loc) · 695 Bytes
/
init.py
File metadata and controls
26 lines (22 loc) · 695 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from os import listdir
import pandas as pd
def main():
dir_neg = "txt_sentoken/neg"
dir_pos = "txt_sentoken/pos"
print("Loading data..")
data = load_docs(dir_neg)
data = data.append(load_docs(dir_pos), ignore_index=True)
data = data.sample(None, 1.0).reset_index(drop=True)
print("Data loaded, saving...")
data.to_csv("data.csv")
print("Done")
def load_docs(dir):
docs = {"text":[], "class":[]}
for filename in listdir(dir):
with open('/'.join([dir, filename])) as f:
doc = f.read()
docs["text"].append(doc)
docs["class"].append(dir[-3:])
return pd.DataFrame(docs)
if __name__ == "__main__":
main()