-
Notifications
You must be signed in to change notification settings - Fork 0
/
analysis.py
109 lines (73 loc) · 3.39 KB
/
analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import re
from datetime import datetime
from mongo import *
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import tools
def avgPartial(arr, include=None, exclude=None, every_nth=None, onlyIfChance=False, minRank=None):
if exclude is not None and include is not None:
raise ValueError(
"Only one of included or excluded elements should be specified")
if exclude:
data = [element for i, element in enumerate(
arr) if (i + 1) % every_nth != 0 and element["accuracy"] is not None]
if include and not onlyIfChance:
data = [element for i, element in enumerate(
arr) if (i + 1) % every_nth == 0 and element["accuracy"] is not None]
if include and onlyIfChance:
data = [element for i, element in enumerate(
arr) if (i + 1) % every_nth == 0 and arr[i-1]["possible_rank"] < minRank and element["accuracy"] is not None]
data = [el["accuracy"] for el in data]
return (sum(data) / len(data), len(data))
def analyze(username="magnuscarlsen"):
# get hikaru
player = getPlayer(players, username)
games = player["games"]
# calculate mean accuracy in all rounds except final
rounds = 10
avgs_not_last = []
for i in range(10):
# calculate mean accuracy for each round in all tournaments except 11
round_avg, *_ = avgPartial(games, include=True, every_nth=i+1)
avgs_not_last.append(round_avg)
avg_last, *_ = avgPartial(games, include=True, every_nth=11)
avgs_not_last = np.array(avgs_not_last)
avgs_if_win, *_ = avgPartial(
games, include=True, every_nth=11, onlyIfChance=True, minRank=5)
accuracies = [game['accuracy']
for game in games if game['accuracy'] is not None]
accuracies_final = [{"accuracy": game['accuracy'], "index": i} for i, game in enumerate(
games) if (i + 1) % 11 == 0 and game['accuracy'] is not None and games[i-1]["possible_rank"] < 5]
accuracies_final = [item["accuracy"]
for i, item in enumerate(accuracies_final)]
scores = [game['accuracy'] for i, game in enumerate(
games) if game['score'] is not None and game['accuracy'] is not None]
print("avgs general")
print(len(accuracies))
print("avgs important")
print(len(accuracies_final))
# Descriptive statistics
tools.descriptive_stats(games)
# Draw histogram of accuracies in 1-10 rounds
tools.accuracyHistogram(avgs_not_last)
# visually evaluate accuracies distribution in all normal games and all important games (if player has winning chances in the last round)
tools.accuracyDistribution(accuracies, accuracies_final, username)
# Kernel density estimation
tools.kde_estimation(accuracies)
# Kolmogorov-Smirnov test
tools.kolmogorov_smirnov(accuracies, accuracies_final)
# Spearman correlation between game scores and accuracy in the game
tools.spearman_correlation(scores, accuracies)
# calculate correlation matrix
tools.correlation_matrix(games)
# Maximum Likelihood estimation to evaluate if game parameters (score, accuracy, etc.) follow normal distribution
tools.maximum_likelihood_estimation(accuracies)
# K-Means clustering
tools.k_means(accuracies)
# K-nearest neighbors
tools.knn(accuracies)
# Linear regression
tools.linear_regression(scores, accuracies)
analyze()