forked from iliao2345/CompressARC
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscoring.py
More file actions
76 lines (56 loc) · 2.41 KB
/
scoring.py
File metadata and controls
76 lines (56 loc) · 2.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import json
from typing import Tuple
import argparse
import sys
def score_submission(submission_file_name, solutions_file_name, include_task_scores=False) -> dict:
"""
Score a submission against ground truth solutions.
Args:
submission_file_name (str): The file name of the submission file.
solutions_file_name (str): The file name of the ground truth solutions.
include_task_scores (bool, optional): Whether to include individual task scores. Defaults to False.
Returns:
dict: A dictionary containing the total score, total tasks scored, and optionally individual task scores.
Reads a submission from file, scores it against the solutions, and returns the score.
"""
# Open your submission & solutions file
with open(submission_file_name, "r") as file:
submission = json.load(file)
with open(solutions_file_name, "r") as file:
solutions = json.load(file)
total_score = 0
total_tasks = 0
task_scores = {}
# Loop through each task in your submission to grade it
for task_id, task_submission in submission.items():
total_tasks += 1
task_score = 0
num_pairs = len(task_submission)
# Go through each task pair. Most will only have 1
for pair_index, pair_attempts in enumerate(task_submission):
pair_correct = False
# Look at both of your attempts
for attempt_key, attempt in pair_attempts.items():
# Check to see if one is correct
if attempt == solutions[task_id][pair_index]:
pair_correct = True
break # If it is correct, log it and break the loop
if pair_correct:
task_score += 1
# Get the average score across the sub-tasks/pairs
task_score /= num_pairs
# Add it to your total score
total_score += task_score
# Log it for that task
task_scores[task_id] = task_score
result = {
'total_score': total_score,
'total_tasks_scored': total_tasks
}
if include_task_scores:
result['task_scores'] = task_scores
return result
submission_file_name = "./submission.json"
solutions_file_name = "dataset/arc-agi_training_solutions.json"
score = score_submission(submission_file_name, solutions_file_name, include_task_scores=True)
print(json.dumps(score, indent=2))