-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0fddee9
commit cc56da9
Showing
1 changed file
with
156 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
import os | ||
import json | ||
|
||
if __name__ == '__main__': | ||
# open output file | ||
with open('./output.json', 'r') as f: | ||
lines = f.readlines() | ||
|
||
### Overall Accuracy, Perception Accuracy, and Hallucination Severity for the entire dataset | ||
total = 0 | ||
correct = 0 | ||
total_yes_in_answer = 0 | ||
correct_yes = 0 | ||
total_no_in_answer = 0 | ||
correct_no = 0 | ||
|
||
for line in lines: | ||
sample = json.loads(line) | ||
answer = sample['answer'].strip().lower() | ||
prediction = sample['pred'].strip().lower() | ||
total += 1 | ||
|
||
# Check if the prediction is correct (for overall accuracy) | ||
if answer in prediction[:5]: | ||
correct += 1 | ||
|
||
# Check for "yes" or "no" in the answers | ||
if 'yes' in answer[:5]: | ||
total_yes_in_answer += 1 | ||
if 'yes' in prediction[:5]: | ||
correct_yes += 1 | ||
elif 'no' in answer[:5]: | ||
total_no_in_answer += 1 | ||
if 'no' in prediction[:5]: | ||
correct_no += 1 | ||
|
||
# Calculate and print overall accuracy, perception accuracy, and hallucination severity for the entire dataset | ||
overall_accuracy = correct / total if total > 0 else 0 | ||
overall_perception_accuracy = correct_yes / total_yes_in_answer if total_yes_in_answer > 0 else 0 | ||
overall_hallucination_severity = correct_no / total_no_in_answer if total_no_in_answer > 0 else 0 | ||
|
||
print(f'Overall Accuracy (entire dataset): {overall_accuracy:.4f}') | ||
print(f'Overall Perception Accuracy (entire dataset): {overall_perception_accuracy:.4f}') | ||
print(f'Overall Hallucination Severity (entire dataset): {overall_hallucination_severity:.4f}\n') | ||
|
||
### Perception Accuracy, Hallucination Severity, and Accuracy for each sub_category | ||
sub_category_dict = {} | ||
|
||
# First loop to accumulate counts for each sub_category | ||
for line in lines: | ||
sample = json.loads(line) | ||
sub_category = sample['sub_category'] | ||
if sub_category not in sub_category_dict: | ||
sub_category_dict[sub_category] = { | ||
'correct_yes': 0, 'total_yes_in_answer': 0, # For perception accuracy | ||
'correct_no': 0, 'total_no_in_answer': 0, # For hallucination severity | ||
'correct': 0, 'total': 0 # For overall accuracy | ||
} | ||
|
||
answer = sample['answer'].strip().lower() | ||
prediction = sample['pred'].strip().lower() | ||
|
||
# Update total for counting predictions and answers | ||
sub_category_dict[sub_category]['total'] += 1 | ||
|
||
# Check for overall accuracy | ||
if answer in prediction[:5]: | ||
sub_category_dict[sub_category]['correct'] += 1 | ||
|
||
# Check for "yes" or "no" in the answers | ||
if 'yes' in answer[:5]: | ||
sub_category_dict[sub_category]['total_yes_in_answer'] += 1 | ||
if 'yes' in prediction[:5]: | ||
sub_category_dict[sub_category]['correct_yes'] += 1 | ||
elif 'no' in answer[:5]: | ||
sub_category_dict[sub_category]['total_no_in_answer'] += 1 | ||
if 'no' in prediction[:5]: | ||
sub_category_dict[sub_category]['correct_no'] += 1 | ||
|
||
# Print the combined result in a table format with alignment | ||
print(f'{"sub_category":<35}{"perception accuracy":<20}{"hallucination severity":<25}{"accuracy":<10}') | ||
for sub_category in sub_category_dict: | ||
correct_yes = sub_category_dict[sub_category]['correct_yes'] | ||
total_yes_in_answer = sub_category_dict[sub_category]['total_yes_in_answer'] | ||
correct_no = sub_category_dict[sub_category]['correct_no'] | ||
total_no_in_answer = sub_category_dict[sub_category]['total_no_in_answer'] | ||
correct = sub_category_dict[sub_category]['correct'] | ||
total = sub_category_dict[sub_category]['total'] | ||
|
||
# Calculate perception accuracy, hallucination severity, and accuracy | ||
perception_accuracy = correct_yes / total_yes_in_answer if total_yes_in_answer > 0 else 0 | ||
hallucination_severity = correct_no / total_no_in_answer if total_no_in_answer > 0 else 0 | ||
accuracy = correct / total if total > 0 else 0 | ||
|
||
print(f'{sub_category:<35}{perception_accuracy:<20.4f}{hallucination_severity:<25.4f}{accuracy:<10.4f}') | ||
|
||
### Perception Accuracy, Hallucination Severity, and Accuracy for object-level and event-level within each sub_category | ||
granularity_dict = { | ||
'object-level': {}, | ||
'event-level': {} | ||
} | ||
|
||
# First loop to accumulate counts for both levels | ||
for line in lines: | ||
sample = json.loads(line) | ||
sub_category = sample['sub_category'] | ||
granularity = sample['granularity'] # Determine the level: object-level or event-level | ||
|
||
if granularity not in granularity_dict: | ||
continue # Skip if the granularity is not recognized | ||
|
||
if sub_category not in granularity_dict[granularity]: | ||
granularity_dict[granularity][sub_category] = { | ||
'correct_yes': 0, 'total_yes_in_answer': 0, # For perception accuracy | ||
'correct_no': 0, 'total_no_in_answer': 0, # For hallucination severity | ||
'correct': 0, 'total': 0 # For overall accuracy | ||
} | ||
|
||
answer = sample['answer'].strip().lower() | ||
prediction = sample['pred'].strip().lower() | ||
|
||
# Update total for counting predictions and answers | ||
granularity_dict[granularity][sub_category]['total'] += 1 | ||
|
||
# Check for overall accuracy | ||
if answer in prediction[:5]: | ||
granularity_dict[granularity][sub_category]['correct'] += 1 | ||
|
||
# Check for "yes" or "no" in the answers | ||
if 'yes' in answer[:5]: | ||
granularity_dict[granularity][sub_category]['total_yes_in_answer'] += 1 | ||
if 'yes' in prediction[:5]: | ||
granularity_dict[granularity][sub_category]['correct_yes'] += 1 | ||
elif 'no' in answer[:5]: | ||
granularity_dict[granularity][sub_category]['total_no_in_answer'] += 1 | ||
if 'no' in prediction[:5]: | ||
granularity_dict[granularity][sub_category]['correct_no'] += 1 | ||
|
||
# Print the combined result for both levels in a table format with alignment | ||
print(f'\n{"Granularity":<15}{"sub_category":<35}{"perception accuracy":<20}{"hallucination severity":<25}{"accuracy":<10}') | ||
for granularity in granularity_dict: | ||
print(f'{granularity.upper()} RESULTS:') | ||
for sub_category in granularity_dict[granularity]: | ||
correct_yes = granularity_dict[granularity][sub_category]['correct_yes'] | ||
total_yes_in_answer = granularity_dict[granularity][sub_category]['total_yes_in_answer'] | ||
correct_no = granularity_dict[granularity][sub_category]['correct_no'] | ||
total_no_in_answer = granularity_dict[granularity][sub_category]['total_no_in_answer'] | ||
correct = granularity_dict[granularity][sub_category]['correct'] | ||
total = granularity_dict[granularity][sub_category]['total'] | ||
|
||
# Calculate perception accuracy, hallucination severity, and accuracy | ||
perception_accuracy = correct_yes / total_yes_in_answer if total_yes_in_answer > 0 else 0 | ||
hallucination_severity = correct_no / total_no_in_answer if total_no_in_answer > 0 else 0 | ||
accuracy = correct / total if total > 0 else 0 | ||
|
||
print(f'{granularity:<15}{sub_category:<35}{perception_accuracy:<20.4f}{hallucination_severity:<25.4f}{accuracy:<10.4f}') |