-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
25 changed files
with
1,887 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
data/ | ||
archive/ | ||
reports/ | ||
stats/ | ||
.idea | ||
.DS_Store | ||
__pycache__/ | ||
*.pyc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Affect detection | ||
|
||
## Dataset | ||
1. Download the 'data' folder from the OSF repository of the study: https://osf.io/5m3yu/ | ||
2. Place the 'data' folder inside the 'affect_detection' folder. | ||
|
||
## Instructions | ||
``` | ||
1. [email protected]:aepinilla/affect_detection.git | ||
2. Go to src/feature_selection/lme_models | ||
3. Open the 3 R files located in the 'lme_models' folder. | ||
4. Edit the line 15 of each of those files, according to the path to your working directory/ | ||
5. From the root folder run: python main.py | ||
``` | ||
|
||
## Preprocessing | ||
The 'data' folder contains data that has been already preprocessed. To replicate the preprocessing steps, follow these steps: | ||
1. Install Matlab. | ||
2. Install EEGLAB following these instructions: https://eeglab.org/tutorials/01_Install/Install.html | ||
3. Clone this repository to the MATLAB folder | ||
4. Transform XDF files to CSV for faster processing: | ||
``` | ||
python xdf_to_csv.py | ||
``` | ||
6. Open EEGLAB in Matlab and run preprocessing script located in src/preprocessing.m | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
""" | ||
Author: Andres Pinilla Palacios | ||
Institution: Quality and Usability Lab, TU Berlin & UTS Games Studio, University of Technology Sydney | ||
""" | ||
|
||
from src.analyse_features import analyse_features | ||
from src.build_classifiers import build_classifiers | ||
from src.compare_methods import compare_methods | ||
from src.extract_features import extract_features | ||
from src.lme_structure import lme_structure | ||
from src.participants_age import participants_age | ||
from src.random_indices import random_indices | ||
from src.settings import participants_codes | ||
|
||
|
||
def main(): | ||
# Generate random indices, extract features, and adjust extracted features to LME format. | ||
for p in participants_codes: | ||
random_indices(p) | ||
extract_features(p) | ||
lme_structure(p) | ||
# Analyse features using LME and RFE | ||
analyse_features() | ||
# Build classification models with selected features | ||
for p in participants_codes: | ||
build_classifiers(p) | ||
# Compare accuracy of classifiers built with features selected using each feature selection method. | ||
compare_methods() | ||
# Calculate participants' age for reporting in manuscript. | ||
participants_age() | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
antropy==0.1.4 | ||
appnope==0.1.3 | ||
asttokens==2.0.5 | ||
backcall==0.2.0 | ||
Bottleneck==1.3.5 | ||
certifi==2022.9.14 | ||
cffi==1.15.1 | ||
charset-normalizer==2.1.1 | ||
cycler==0.11.0 | ||
debugpy==1.6.0 | ||
decorator==5.1.1 | ||
entrypoints==0.4 | ||
executing==0.8.3 | ||
flatten-dict==0.4.2 | ||
fonttools==4.32.0 | ||
idna==3.4 | ||
ipykernel==6.13.0 | ||
ipyparallel==6.3.0 | ||
ipython==8.2.0 | ||
ipython-genutils==0.2.0 | ||
jedi==0.18.1 | ||
Jinja2==3.1.2 | ||
joblib==1.1.0 | ||
jupyter-client==7.1.2 | ||
jupyter-core==4.10.0 | ||
kiwisolver==1.4.2 | ||
lazy_loader==0.1rc2 | ||
littleutils==0.2.2 | ||
llvmlite==0.39.1 | ||
MarkupSafe==2.1.1 | ||
matplotlib==3.5.1 | ||
matplotlib-inline==0.1.3 | ||
metakernel==0.29.0 | ||
mkl-fft==1.3.1 | ||
mkl-random==1.2.2 | ||
mkl-service==2.4.0 | ||
munkres==1.1.4 | ||
nest-asyncio==1.5.5 | ||
numba==0.56.2 | ||
numexpr==2.8.3 | ||
numpy==1.22.3 | ||
outdated==0.2.1 | ||
packaging==21.3 | ||
pandas==1.4.4 | ||
pandas-flavor==0.3.0 | ||
parso==0.8.3 | ||
patsy==0.5.2 | ||
pexpect==4.8.0 | ||
pickleshare==0.7.5 | ||
Pillow==9.1.0 | ||
pingouin==0.5.2 | ||
pip==22.1.2 | ||
portalocker==2.3.0 | ||
prompt-toolkit==3.0.29 | ||
psutil==5.9.0 | ||
ptyprocess==0.7.0 | ||
pure-eval==0.2.2 | ||
pycparser==2.21 | ||
Pygments==2.11.2 | ||
pyparsing==3.0.8 | ||
python-dateutil==2.8.2 | ||
pytz==2022.1 | ||
pytz-deprecation-shim==0.1.0.post0 | ||
pyxdf==1.16.3 | ||
pyzmq==22.3.0 | ||
requests==2.28.1 | ||
rpy2==3.5.4 | ||
scikit-learn==1.0.2 | ||
scipy==1.8.0 | ||
seaborn==0.11.2 | ||
setuptools==59.8.0 | ||
six==1.16.0 | ||
stack-data==0.2.0 | ||
statsmodels==0.13.2 | ||
stochastic==0.7.0 | ||
tabulate==0.8.10 | ||
threadpoolctl==3.1.0 | ||
tornado==6.1 | ||
traitlets==5.1.1 | ||
tzdata==2022.4 | ||
tzlocal==4.2 | ||
unicodedata2==14.0.0 | ||
urllib3==1.26.12 | ||
wcwidth==0.2.5 | ||
wheel==0.37.1 | ||
xarray==2022.6.0 |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
""" | ||
Author: Andres Pinilla Palacios | ||
Institution: Quality and Usability Lab, TU Berlin & UTS Games Studio, University of Technology Sydney | ||
""" | ||
|
||
from src.feature_selection.conduct_lme import conduct_lme | ||
from src.feature_selection.conduct_rfe import conduct_rfe | ||
|
||
def analyse_features(): | ||
conduct_lme() | ||
conduct_rfe() | ||
|
||
if __name__ == "__main__": | ||
analyse_features() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
""" | ||
Author: Andres Pinilla Palacios | ||
Institution: Quality and Usability Lab, TU Berlin & UTS Games Studio, University of Technology Sydney | ||
""" | ||
|
||
from collections import defaultdict | ||
import pandas as pd | ||
from sklearn import metrics | ||
from sklearn.ensemble import RandomForestClassifier | ||
from sklearn.model_selection import train_test_split | ||
|
||
from src.feature_selection.select_features_rfe import select_features_rfe | ||
from src.feature_selection.select_features_lme import select_features_lme | ||
from src.settings import d, dimensions, feature_selection_approaches, random_states_list | ||
|
||
|
||
def get_metrics(approach, p, rs): | ||
# Load dataset | ||
if approach == 'lme': | ||
participant_data = select_features_lme(p, rs) | ||
if approach == 'rfe': | ||
participant_data = select_features_rfe(p, rs) | ||
|
||
nested_dict = lambda: defaultdict(nested_dict) | ||
participant_metrics = nested_dict() | ||
for dim in dimensions: | ||
print('Building classifier for ' + dim) | ||
features = participant_data[dim]['features'] | ||
labels = participant_data[dim]['labels'] | ||
|
||
# Split dataset into training set and test set | ||
X_train, X_test, y_train, y_test = train_test_split(features, labels, train_size=0.3, random_state=rs) | ||
|
||
#Create a svm Classifier | ||
clf = RandomForestClassifier() | ||
#Train the model using the training sets | ||
clf.fit(X_train, y_train) | ||
#Predict the response for test dataset | ||
y_pred = clf.predict(X_test) | ||
|
||
accuracy = metrics.accuracy_score(y_test, y_pred) | ||
precision = metrics.precision_score(y_test, y_pred) | ||
recall = metrics.recall_score(y_test, y_pred) | ||
f1_score = metrics.f1_score(y_test, y_pred) | ||
|
||
participant_metrics[dim]['accuracy'] = accuracy | ||
participant_metrics[dim]['precision'] = precision | ||
participant_metrics[dim]['recall'] = recall | ||
participant_metrics[dim]['f1_score'] = f1_score | ||
|
||
return participant_metrics | ||
|
||
|
||
def build_classifiers(p): | ||
# Build classifiers with each feature selection approach | ||
print('Building classifiers for participant ' + p) | ||
for approach in feature_selection_approaches: | ||
print('Using features obtained with ' + approach + ' analysis') | ||
# Build dict with each random state (10 different random states) | ||
participant_metrics_dict = {} | ||
for rs in random_states_list: | ||
print('Using random state ' + str(rs)) | ||
participant_metrics_dict[rs] = pd.DataFrame.from_dict(get_metrics(approach, p, rs)) | ||
|
||
participant_metrics_df = pd.concat(participant_metrics_dict) | ||
participant_metrics_df.to_csv(d + '/reports/metrics/%s/' % (approach) + p + '.csv') | ||
|
||
|
||
if __name__ == "__main__": | ||
build_classifiers() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
""" | ||
Author: Andres Pinilla Palacios | ||
Institution: Quality and Usability Lab, TU Berlin & UTS Games Studio, University of Technology Sydney | ||
""" | ||
|
||
from collections import defaultdict | ||
import matplotlib.pyplot as plt | ||
import pandas as pd | ||
import pingouin as pg | ||
from pingouin import ttest | ||
import seaborn as sns | ||
from scipy import stats | ||
|
||
from src.helper import conduct_iqr | ||
from src.settings import d, dimensions, feature_selection_approaches, participants_codes | ||
|
||
|
||
def compare_methods(): | ||
all_participant_metrics = [] | ||
for p in participants_codes: | ||
for fsa in feature_selection_approaches: | ||
file_path = (d + '/reports/metrics/%s/' % (fsa)) + p + '.csv' | ||
participant_metrics = pd.read_csv(file_path) | ||
participant_metrics = participant_metrics.reset_index(drop=['index']) | ||
participant_metrics = participant_metrics.rename(columns = {'Unnamed: 0': 'random_state', 'Unnamed: 1': 'metric'}) | ||
participant_metrics['participant'] = p | ||
participant_metrics['approach'] = fsa | ||
all_participant_metrics.append(participant_metrics) | ||
|
||
all_metrics_df = pd.concat(all_participant_metrics) | ||
# Precision, recall and F1-score | ||
all_means = all_metrics_df.groupby(['participant', 'approach', 'metric']).mean() | ||
all_means = all_means.drop(['random_state'], axis=1) | ||
all_means_of_means = all_means.groupby(['approach', 'metric']).mean() | ||
all_std_of_means = all_means.groupby(['approach', 'metric']).std() | ||
print(round(all_means_of_means * 100, 4)) | ||
print(round(all_std_of_means * 100, 3)) | ||
# Subset accuracy | ||
accuracy = all_metrics_df.loc[all_metrics_df['metric'] == 'accuracy'] | ||
# Participant means | ||
means_pp = accuracy.groupby(['participant', 'approach']).mean() | ||
means_pp = means_pp.reset_index().drop(['random_state'], axis=1) | ||
# Reshape data | ||
reshaped_data = means_pp.melt(id_vars=['participant', 'approach'], var_name='dimension', value_name='mean_accuracy') | ||
# Remove outliers | ||
outliers = list(conduct_iqr(reshaped_data)) | ||
no_outliers = reshaped_data[~reshaped_data['participant'].isin(outliers)] | ||
no_outliers['mean_accuracy'] = no_outliers['mean_accuracy'] * 100 | ||
no_outliers['approach'] = no_outliers['approach'].str.upper() | ||
# Assupmtions check | ||
# Shapiro-Wilk test of normal distribution | ||
results_shapiro = stats.shapiro(no_outliers['mean_accuracy']) | ||
round(results_shapiro[0], 3) | ||
round(results_shapiro[1], 3) | ||
# Sphericity | ||
# Mauchly's test of sphericity | ||
result_mauchly = pg.sphericity(no_outliers, dv='mean_accuracy', subject='participant', within=['approach', 'dimension']) | ||
round(result_mauchly[2], 3) | ||
round(result_mauchly[4], 3) | ||
# ANOVA | ||
# Perform two-way repeated m ANOVA | ||
two_way_aov = pg.rm_anova(dv='mean_accuracy', within=['approach', 'dimension'], subject='participant', data=no_outliers) | ||
print(two_way_aov) | ||
# Main effect for dimension | ||
main_effect_dimension = pg.anova(dv='mean_accuracy', between='dimension', data=no_outliers, detailed=True) | ||
print(main_effect_dimension) | ||
# Main effect for feature selection method | ||
main_effect_approach = pg.anova(dv='mean_accuracy', between='approach', data=no_outliers, detailed=True) | ||
print(main_effect_approach) | ||
# Paired samples t-test | ||
|
||
nested_dict = lambda: defaultdict(nested_dict) | ||
ttest_dict = nested_dict() | ||
for dim in dimensions: | ||
dim_data = no_outliers.loc[no_outliers.dimension == dim] | ||
dim_rfe = dim_data.loc[dim_data.approach == 'RFE'][['mean_accuracy']].values.flatten() | ||
dim_lme = dim_data.loc[dim_data.approach == 'LME'][['mean_accuracy']].values.flatten() | ||
res_dim_ttest = ttest(dim_rfe, dim_lme, paired=True).round(3) | ||
ttest_dict[dim]['ttest_results'] = res_dim_ttest | ||
ttest_dict[dim]['means']['lme'] = dim_lme.mean().round(3) | ||
ttest_dict[dim]['means']['rfe'] = dim_rfe.mean().round(3) | ||
ttest_dict[dim]['std']['lme'] = dim_lme.std().round(3) | ||
ttest_dict[dim]['std']['rfe'] = dim_rfe.std().round(3) | ||
|
||
# Plot | ||
sns.set_palette("Paired") | ||
sns.set_style("whitegrid") | ||
g = sns.barplot(data=no_outliers, x="dimension", y="mean_accuracy", hue='approach') | ||
g.set(xlabel='Affective dimension', ylabel='Mean accuracy of classification models') | ||
g.set_xticklabels(['Negativity', 'Positivity', 'Net Predisposition']) | ||
g.legend(title='Feature selection method') | ||
sns.move_legend(g, "lower left") | ||
plt.savefig('../reports/figures/anova_results.png', dpi=300) | ||
plt.show() | ||
|
||
return outliers | ||
|
||
|
||
if __name__ == "__main__": | ||
compare_methods() |
Oops, something went wrong.