Skip to content

Commit

Permalink
pres start
Browse files Browse the repository at this point in the history
  • Loading branch information
maxhumber committed Oct 27, 2019
1 parent 3048a17 commit 4f5fc58
Show file tree
Hide file tree
Showing 17 changed files with 17,597 additions and 150,661 deletions.
149,487 changes: 17,235 additions & 132,252 deletions data/candy.csv

Large diffs are not rendered by default.

17,235 changes: 0 additions & 17,235 deletions data/candy_edit.csv

This file was deleted.

143 changes: 0 additions & 143 deletions data/manual_candy.csv

This file was deleted.

69 changes: 69 additions & 0 deletions distances.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import NearestNeighbors
import pandas as pd

# toying around

df = pd.read_csv('data/candy.csv')
df = df[df['user'].isin(df['user'].sample(10))]
df = df.pivot(index='item', columns='user', values='review').reset_index()
df.head(1)

df = df.melt(id_vars='item', var_name='user', value_name='review')
df = df.dropna().reset_index(drop=True)
df

# data shaping

df = pd.DataFrame([
[0, 1, 1, 0, 0, 0],
[0, 1, 1, 1, 0, 0],
[1, 0, 0, 1, 0, 0],
[0, 1, 1, 0, 0, 1],
[0, 0, 0, 1, 1, 1]
])

euclidean_distances(df)

##


df = pd.read_csv("data/candy.csv")

df = df.groupby(["user"])["repo"].apply(lambda x: ",".join(x))
df = pd.DataFrame(df)

class NNRecommender:
def __init__(
self, n_neighbors=10, max_features=1000, tokenizer=lambda x: x.split(",")
):
self.cv = CountVectorizer(tokenizer=tokenizer, max_features=max_features)
self.nn = NearestNeighbors(n_neighbors=n_neighbors)

def fit(self, X):
self.X = X
X = self.cv.fit_transform(X)
self.nn.fit(X)
return self

def predict(self, X):
Xp = []
for Xi in X:
Xt = self.cv.transform([Xi])
_, neighbors = self.nn.kneighbors(Xt)
repos = []
for n in neighbors[0]:
r = self.X.iloc[int(n)].split(",")
repos.extend(r)
repos = list(set(repos))
repos = [r for r in repos if r not in Xi.split(",")]
Xp.append(repos)
return Xp

n_neighbors = 10
max_features = 1000
model = NNRecommender(n_neighbors, max_features)
model.fit(df["repo"])

with open("model/model.pkl", "wb") as f:
dill.dump(model, f)
44 changes: 0 additions & 44 deletions edit_candy.py

This file was deleted.

Binary file added images/candy.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/cat_and_mouse.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/halloween.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/influenster.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added images/movielens.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 4f5fc58

Please sign in to comment.