-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdisaggregate_racking.py
More file actions
73 lines (65 loc) · 2.67 KB
/
disaggregate_racking.py
File metadata and controls
73 lines (65 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# disaggregate_racking.py
#
# Convert tabular racking data with multiple core sections per row
# to a single section per row, preserving the cores' Location and Tube IDs.
#
# Input:
# CSV with Location, Tube/Container ID, Core 1, Core 2... headers in row 1.
# Each data row must have non-empty values for Location and Core 1. Tube/Container ID can be empty.
# Row may have additional non-empty values in Core 2+ and beyond.
#
# Output:
# CSV with Location, Tube ID, Core headers in row 1, followed by corresponding data rows.
import csv, os, sys
def is_valid_row(row, row_number):
if len(row) < 3:
print(f"Skipping row {row_number}: must have Location, Tube ID, and 1+ Core columns")
return False
if len(row[0]) == 0:
print(f"Skipping row {row_number}: Empty location")
return False
# row[1] aka Tube/Container ID can be empty
if len(row[2]) == 0:
print(f"Skipping row {row_number}: Empty Core 1")
for cell in row[3:]: # bail on goofy formatting
assert len(cell) == 0, f"Empty Core 1 but valid cores exist beyond: {row[3:]}"
return False
return True
def create_single_section_rows(row):
ss_rows = []
location = row[0]
tube_id = row[1]
for cell in row[2:]:
if len(cell) > 0:
section_row = [location, tube_id, cell]
ss_rows.append(section_row)
return ss_rows
def disaggregate(inputfile, outputfile):
all_rows = []
with open(inputfile, 'rt') as f:
reader = csv.reader(f)
for idx, row in enumerate(reader):
if idx == 0:
continue # skip header
if not is_valid_row(row, idx+1): # 1-based row numbers
continue
ss_rows = create_single_section_rows(row)
all_rows += ss_rows
# print(ss_rows)
with open(outputfile, 'wt', newline='', encoding='utf-8-sig') as outfile:
writer = csv.writer(outfile)
writer.writerow(['Location', 'Tube ID', 'Core'])
writer.writerows(all_rows)
def convert_dirs(input_dir, output_dir):
csv_files = [f for f in os.listdir(input_dir) if f.endswith('.csv')]
input_abspath = os.path.abspath(input_dir)
output_abspath = os.path.abspath(output_dir)
for cf in csv_files:
print(f'Disaggregating racking data in {cf}...')
output_file = f"{cf.split('.')[0]}_one_per_row.csv"
disaggregate(os.path.join(input_abspath, cf), os.path.join(output_abspath, output_file))
if __name__ == "__main__":
if len(sys.argv) < 3:
print("Usage:\npython disaggregate_racking.py [dir containing input .csv files] [dir to write .csv output files]")
else:
convert_dirs(sys.argv[1], sys.argv[2])