-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDataSet.py
More file actions
58 lines (46 loc) · 1.68 KB
/
DataSet.py
File metadata and controls
58 lines (46 loc) · 1.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import os
import pandas as pd
# 配置参数
RAW_DATA_DIR = 'data_dev' # 存放 txt 文件的文件夹
OUTPUT_CSV = 'csv_dev/eeg_data.csv' # 输出的 CSV 文件
SAMPLES_PER_SEGMENT = 128 # 每个样本用 128 行数据
LABELS = ['blink', 'rest', 'clench'] # 支持的标签
# 确保输出目录存在
os.makedirs('csv_dev', exist_ok=True)
all_samples = []
all_labels = []
# 遍历所有 txt 文件
for filename in os.listdir(RAW_DATA_DIR):
if not filename.endswith('.txt'):
continue
label = filename.split('.')[0].lower()
if label not in LABELS:
print(f"跳过未知标签文件: {filename}")
continue
file_path = os.path.join(RAW_DATA_DIR, filename)
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
# 跳过表头
lines = lines[1:] if 'Raw' in lines[0] else lines
# 提取每行的数值
values = []
for line in lines:
parts = line.strip().split('\t')
if len(parts) >= 2:
try:
val = int(parts[1])
values.append(val)
except ValueError:
continue
# 切分为多个样本(每 128 个为一组)
for i in range(0, len(values) - SAMPLES_PER_SEGMENT + 1, SAMPLES_PER_SEGMENT):
segment = values[i:i + SAMPLES_PER_SEGMENT]
all_samples.append(segment)
all_labels.append(label)
print(f"总共构建了 {len(all_samples)} 个样本")
# 构建 DataFrame
df = pd.DataFrame(all_samples)
df['label'] = all_labels
# 保存为 CSV
df.to_csv(OUTPUT_CSV, index=False)
print(f"已保存数据集到: {OUTPUT_CSV}")