Skip to content

Commit

Permalink
updated eval_recall.py to use Youden index for optimal sensitivity an…
Browse files Browse the repository at this point in the history
…d specificity values.
  • Loading branch information
Axel Montout committed Jul 9, 2024
1 parent 84e724d commit dfec59f
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 55 deletions.
28 changes: 25 additions & 3 deletions boot_roc_curve.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
# Set matplotlib to use Times New Roman
rcParams['font.family'] = 'serif'
rcParams['font.serif'] = ['Times New Roman']

import scipy
from scipy.stats import ttest_rel
import scipy.stats as stats

class AnyObjectHandler(HandlerBase):
def create_artists(
Expand Down Expand Up @@ -245,15 +247,15 @@ def main(path=None, n_bootstrap=100, n_job=6):
)

try:
xaxis_train_ = random.sample(xaxis_train, 100)
xaxis_train_ = random.sample(xaxis_train, 10)
except ValueError as e:
print(e)
xaxis_train_ = xaxis_train

for fpr, tpr in xaxis_train_:
ax_roc_merge.plot(fpr, tpr, color="tab:purple", alpha=0.3, linewidth=1)

xaxis_test_ = random.sample(xaxis_test, 100)
xaxis_test_ = random.sample(xaxis_test, 10)
for fpr, tpr in xaxis_test_:
ax_roc_merge.plot(fpr, tpr, color="tab:blue", alpha=0.3, linewidth=1)

Expand Down Expand Up @@ -533,6 +535,8 @@ def boostrap_auc_peak(results, out_dir):
n_job = int(sys.argv[3])
else:
res_folder = Path("E:/Cats/paper_debug_regularisation_8/")
n_bootstrap = 20
n_job = 2

results = []
folders = [
Expand All @@ -541,6 +545,9 @@ def boostrap_auc_peak(results, out_dir):
if x.is_dir()
]
for i, item in enumerate(folders):

if i > 4:
break
print(f"{i}/{len(folders)}...")
print(item)
res = main(item, n_bootstrap=n_bootstrap, n_job=n_job)
Expand Down Expand Up @@ -583,3 +590,18 @@ def boostrap_auc_peak(results, out_dir):
df_ = df_.head(20)
print(df_.to_latex(index=False))
df.to_csv("cat_result_table.csv", index=False)

df_noproc = df[df["Pre-processing"] == '']
df_noproc = df_noproc.sort_values("N peaks")
df_noproc_auc = df_noproc["median_auc_test"].values

df_l1 = df[df["Pre-processing"] == 'L1']
df_l1 = df_l1.sort_values("N peaks")
df_l1_auc = df_l1["median_auc_test"].values

# First, conduct the Wilcoxon signed-rank test
wilcoxon_p_value = stats.wilcoxon(df_l1_auc, df_noproc_auc, alternative='less').pvalue
print(f"Wilcoxon Signed-Rank Test: p-value = {wilcoxon_p_value}")

t_stat, p_value = ttest_rel(df_l1_auc, df_noproc_auc)
print(f"Paired T-Test: t-statistic = {t_stat}, p-value = {p_value}")
123 changes: 71 additions & 52 deletions build_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from plotnine import ggplot, aes, geom_jitter, stat_summary, theme, element_text
from utils._anscombe import anscombe
from utils.utils import time_of_day_
import pytest


from matplotlib import rcParams
# Set matplotlib to use Times New Roman
Expand Down Expand Up @@ -445,22 +445,37 @@ def get_cat_data(data_dir, bin, subset=None):
# files = new

dfs = []
gender = ""
for i, file in enumerate(files):
print(f"progress[{i}/{len(files)}]...")
print(f"reading file: {file}")

cat_id = int(file.stem.split("_")[0])
cat_name = file.stem.split("_")[1]
individual_to_ignore = ["MrDudley", "Oliver_F", "Lucy"]
if cat_name in individual_to_ignore:
continue
cat_meta = get_cat_meta(data_dir, cat_id, individual_to_ignore=individual_to_ignore)
df = pd.read_csv(file, sep=",", skiprows=range(0, 23), header=None)
df = format_raw_data(df, bin)
df["health"] = cat_meta["health"]
df["age"] = cat_meta["age"]
df["cat_id"] = cat_id
dfs.append(df)
try:
print(f"progress[{i}/{len(files)}]...")
print(f"reading file: {file}")
cat_id = int(file.stem.split("_")[0])
cat_name = file.stem.split("_")[1]

if "maisie" not in str(file).lower():
continue

individual_to_ignore = ["MrDudley", "Oliver_F", "Lucy"]
if cat_name in individual_to_ignore:
continue
cat_meta = get_cat_meta(data_dir, cat_id, individual_to_ignore=individual_to_ignore)
df = pd.read_csv(file, sep=",", nrows=1, header=None)

df_ = pd.read_csv(file, sep=",", nrows=23, header=1, error_bad_lines=False)
gender = df_[df_["Filename:"] == "Gender:"].values[0][1]
weight = df_[df_["Filename:"] == "Weight:"].values[0][1]
#df = format_raw_data(df, bin)
df["health"] = cat_meta["health"]
df["age"] = cat_meta["age"]
df["cat_id"] = cat_id
df["mob_score"] = cat_meta['mobility_score']
df["gender"] = gender
df["weight"] = weight
df = df[["cat_id", "age", "gender", "mob_score", "health", "weight"]]
dfs.append(df)
except Exception as e:
print(e)
return dfs


Expand All @@ -471,7 +486,7 @@ def run(
out_dir: Path = typer.Option(
..., exists=False, file_okay=False, dir_okay=True, resolve_path=True
),
dataset_path: Path = Path("dataset.csv"),
dataset_path: Path = Path("dataset_test8.csv"),
bin: str = "S",
w_size: List[int] = [15],
threshs: List[int] = [10],
Expand Down Expand Up @@ -509,8 +524,11 @@ def run(
cat_data = get_cat_data(data_dir, bin)
#dataset_path = f"{dataset_path.name}_{bin}.csv"
print(f"saving {dataset_path}...")
pd.concat(cat_data).to_csv(dataset_path, index=True)
#print("done.")
df_meta = pd.concat(cat_data)
df_meta.to_csv(dataset_path, index=True)
df_meta = df_meta.drop("weight", axis=1)
print(df_meta.to_latex())
print("done.")

datasets = []
for t in threshs:
Expand Down Expand Up @@ -541,40 +559,41 @@ def run(
return datasets


def test():

n_peak = 7
rois = []
for i in range(10):
rois.append([f"sample {i+1}"])
rois = np.array(rois)

idxs_peaks = np.arange(len(rois))
combinat = list(combinations(idxs_peaks, n_peak))
try:
rois_idxs = random.sample(combinat, k=100)
except ValueError as e:
print(e)
print(f"There are less samples than max_sample={100}")
rois_idxs = combinat

#build augmented sample by concatenating permutations of peaks
n_peak_samples = []
for idxs in rois_idxs:
new_samples = []
for i in idxs:
sample = rois[i]
new_samples.append(sample)
activity = np.concatenate(new_samples)
s = activity.tolist()
n_peak_samples.append(s)
n_peak_samples = np.array(n_peak_samples)
print(f"{len(rois)} samples before combination")
#print(rois)
print(f"{len(n_peak_samples)} samples after combination")
#print(n_peak_samples)
# def test():
#
# n_peak = 7
# rois = []
# for i in range(10):
# rois.append([f"sample {i+1}"])
# rois = np.array(rois)
#
# idxs_peaks = np.arange(len(rois))
# combinat = list(combinations(idxs_peaks, n_peak))
# try:
# rois_idxs = random.sample(combinat, k=100)
# except ValueError as e:
# print(e)
# print(f"There are less samples than max_sample={100}")
# rois_idxs = combinat
#
# #build augmented sample by concatenating permutations of peaks
# n_peak_samples = []
# for idxs in rois_idxs:
# new_samples = []
# for i in idxs:
# sample = rois[i]
# new_samples.append(sample)
# activity = np.concatenate(new_samples)
# s = activity.tolist()
# n_peak_samples.append(s)
# n_peak_samples = np.array(n_peak_samples)
# print(f"{len(rois)} samples before combination")
# #print(rois)
# print(f"{len(n_peak_samples)} samples after combination")
# #print(n_peak_samples)


if __name__ == "__main__":
typer.run(run)
run(Path("E:\Cats"), Path("E:\Cats"))
#typer.run(run)

4 changes: 4 additions & 0 deletions crepuscular.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,11 @@ def ml(samples_dir, n_bootstrap=100, n_job=5):
cat_meta = get_cat_meta(data_dir, None)

#Get data from raw csv
dataset_path = "meta_data.csv"
cat_data = get_cat_data(data_dir, "S")
print(f"saving {dataset_path}...")
pd.concat(cat_data).to_csv(dataset_path, index=True)

num_ticks = 6
p = 0.95
w_size = 30
Expand Down
19 changes: 19 additions & 0 deletions utils/del_pkl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import os
import glob

# Define the directory path
directory = r"E:\Cats\paper_debug_regularisation_36"

# Construct the search pattern
pattern = os.path.join(directory, "*.pkl")

# Get the list of all .pkl files in the directory
pkl_files = glob.glob(pattern)

# Delete each .pkl file
for file_path in pkl_files:
try:
os.remove(file_path)
print(f"Deleted: {file_path}")
except Exception as e:
print(f"Error deleting {file_path}: {e}")

0 comments on commit dfec59f

Please sign in to comment.