-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathjoin_problems.py
More file actions
executable file
·97 lines (81 loc) · 3.6 KB
/
Copy pathjoin_problems.py
File metadata and controls
executable file
·97 lines (81 loc) · 3.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python3
import argparse
import itertools
import logging
import os
import numpy as np
import pandas as pd
import yaml
import vampire_ml.results
def arg_to_columns(arg):
if arg is None:
return None
return list(itertools.chain(*arg))
def str_to_column(s):
value = yaml.safe_load(s)
if isinstance(value, list):
value = tuple(value)
return value
def str_to_order(s):
return s in ['a', 'asc', 'ascending', '+', '1', 'True', 'true']
def filter_columns(df, filters):
for column, op, expected in filters:
assert op in ['<=', '>=', '==']
column = str_to_column(column)
actual = df[column]
flags = None
if op == '<=':
flags = actual <= float(expected)
elif op == '>=':
flags = actual >= float(expected)
elif op == '==':
flags = actual == expected
assert flags is not None
logging.debug(f'{column} {op} {expected}: {flags.sum()}/{len(flags)}')
df = df[flags]
return df
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('problems', nargs='+', help='dataframe pickle paths')
parser.add_argument('--output', required=True, metavar='DIRECTORY', help='output directory')
parser.add_argument('--columns_common', nargs='+', action='append', type=str_to_column, metavar='COLUMN')
parser.add_argument('--columns_individual', nargs='+', action='append', type=str_to_column, metavar='COLUMN')
parser.add_argument('--filters', nargs=3, action='append', metavar=('COLUMN', 'COMPARISON', 'VALUE'),
help='triplets "column_name {<=,>=,==} expected_value"')
parser.add_argument('--sort_columns', type=str_to_column, metavar='COLUMN', nargs='+')
parser.add_argument('--sort_order', type=str_to_order, nargs='+')
parser.add_argument('--count', type=int, metavar='N', help='number of top records to take from each input dataset')
namespace = parser.parse_args()
logging.basicConfig(level=logging.DEBUG)
columns_individual = arg_to_columns(namespace.columns_individual)
columns_common = arg_to_columns(namespace.columns_common)
problems_common = None
problems = dict()
for i, file_path in enumerate(namespace.problems):
df = pd.read_pickle(file_path)
assert df.index.name == 'problem_path'
if columns_common is not None and i == 0:
problems_common = df[columns_common]
if columns_individual is None:
continue
if namespace.filters is not None:
df = filter_columns(df, namespace.filters)
if namespace.sort_columns is not None:
df.sort_values(namespace.sort_columns, ascending=namespace.sort_order, inplace=True)
if len(columns_individual) > 0:
df = df[columns_individual]
if namespace.count is not None:
assert namespace.count >= 0
df = df[:namespace.count]
problems[file_path] = df
if len(problems) > 1:
problems_aggregated = pd.concat(problems.values(), axis=1, keys=problems.keys(), sort=True)
elif len(problems) == 1:
problems_aggregated = next(iter(problems.values()))
else:
problems_aggregated = problems_common
problems_aggregated.index.name = 'problem_path'
if problems_common is not None and len(problems) >= 1:
problems_aggregated = problems_common.join(problems_aggregated, how='right')
vampire_ml.results.save_df(problems_aggregated, 'problems', output_dir=namespace.output)
np.savetxt(os.path.join(namespace.output, 'problems.txt'), problems_aggregated.index.values, fmt='%s')