-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path00_dedup.py
More file actions
executable file
·50 lines (37 loc) · 1.38 KB
/
00_dedup.py
File metadata and controls
executable file
·50 lines (37 loc) · 1.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/python3
"""Let's slim down our CSV files.
We'll do it by getting rid of lines where the byte on the lines is the same
as the byte at the preceding timestep.
We'll also strip out the weird non-ascii characters from the file. Oh Rigol...
"""
import argparse
import string
def _define_flags():
"""Defines an `ArgumentParser` for command-line flags used by this program."""
flags = argparse.ArgumentParser(
description='Slim down CSV files. Output goes to stdout.')
flags.add_argument('csv_file', type=argparse.FileType('r'),
help='CSV file to process')
return flags
def main(FLAGS):
# Strip first two lines (headers) from the file.
next(FLAGS.csv_file)
next(FLAGS.csv_file)
prev_byte = ''
for line in FLAGS.csv_file:
# Strip out non-ASCII, since some lines have plenty of $00 bytes.
#line = line.encode('ascii', errors='ignore').decode().rstrip()
line = ''.join(c for c in line if c in string.printable).rstrip()
# Robustly parse the line so that partial lines prepended to the current
# line don't wreck things.
try:
timestep, byte = line.split(',')[-2:]
except ValueError:
pass # Not enough values to unpack, probably.
if byte != prev_byte:
print('{},{}'.format(timestep, byte))
prev_byte = byte
if __name__ == '__main__':
flags = _define_flags()
FLAGS = flags.parse_args()
main(FLAGS)