Skip to content

Commit

Permalink
Codebase cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
fchollet committed Jul 22, 2015
1 parent f392a78 commit ec8f7f0
Show file tree
Hide file tree
Showing 37 changed files with 706 additions and 650 deletions.
1 change: 1 addition & 0 deletions docs/sources/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ Keras welcomes all contributions from the community.
- New features should be documented. Make sure you update the documentation along with your Pull Request.
- The documentation for every new feature should include a usage example in the form of a code snippet.
- All changes should be tested. Make sure any new feature you add has a corresponding unit test.
- Please no Pull Requests about coding style.
- Even if you don't contribute to the Keras source code, if you have an application of Keras that is concise and powerful, please consider adding it to our collection of [examples](https://github.com/fchollet/keras/tree/master/examples).


Expand Down
18 changes: 5 additions & 13 deletions examples/cifar10_cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
(it's still underfitting at that point, though).
Note: the data was pickled with Python 2, and some encoding issues might prevent you
from loading it in Python 3. You might have to load it in Python 2,
from loading it in Python 3. You might have to load it in Python 2,
save it in a different format, load it in Python 3 and repickle it.
'''

Expand All @@ -40,16 +40,16 @@

model = Sequential()

model.add(Convolution2D(32, 3, 3, 3, border_mode='full'))
model.add(Convolution2D(32, 3, 3, 3, border_mode='full'))
model.add(Activation('relu'))
model.add(Convolution2D(32, 32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(poolsize=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 32, 3, 3, border_mode='full'))
model.add(Convolution2D(64, 32, 3, 3, border_mode='full'))
model.add(Activation('relu'))
model.add(Convolution2D(64, 64, 3, 3))
model.add(Convolution2D(64, 64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(poolsize=(2, 2)))
model.add(Dropout(0.25))
Expand Down Expand Up @@ -93,7 +93,7 @@
horizontal_flip=True, # randomly flip images
vertical_flip=False) # randomly flip images

# compute quantities required for featurewise normalization
# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
datagen.fit(X_train)

Expand All @@ -114,11 +114,3 @@
for X_batch, Y_batch in datagen.flow(X_test, Y_test):
score = model.test_on_batch(X_batch, Y_batch)
progbar.add(X_batch.shape[0], values=[("test loss", score)])








12 changes: 6 additions & 6 deletions examples/imdb_lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@
'''
Train a LSTM on the IMDB sentiment classification task.
The dataset is actually too small for LSTM to be of any advantage
The dataset is actually too small for LSTM to be of any advantage
compared to simpler, much faster methods such as TF-IDF+LogReg.
Notes:
Notes:
- RNNs are tricky. Choice of batch size is important,
choice of loss and optimizer is critical, etc.
- RNNs are tricky. Choice of batch size is important,
choice of loss and optimizer is critical, etc.
Some configurations won't converge.
- LSTM loss decrease patterns during training can be quite different
from what you see with CNNs/MLPs/etc.
- LSTM loss decrease patterns during training can be quite different
from what you see with CNNs/MLPs/etc.
GPU command:
THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python imdb_lstm.py
Expand Down
4 changes: 4 additions & 0 deletions examples/kaggle_otto_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
Get the data from Kaggle: https://www.kaggle.com/c/otto-group-product-classification-challenge/data
'''


def load_data(path, train=True):
df = pd.read_csv(path)
X = df.values.copy()
Expand All @@ -47,13 +48,15 @@ def load_data(path, train=True):
X, ids = X[:, 1:].astype(np.float32), X[:, 0].astype(str)
return X, ids


def preprocess_data(X, scaler=None):
if not scaler:
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)
return X, scaler


def preprocess_labels(labels, encoder=None, categorical=True):
if not encoder:
encoder = LabelEncoder()
Expand All @@ -63,6 +66,7 @@ def preprocess_labels(labels, encoder=None, categorical=True):
y = np_utils.to_categorical(y)
return y, encoder


def make_submission(y_prob, ids, encoder, fname):
with open(fname, 'w') as f:
f.write('id,')
Expand Down
2 changes: 1 addition & 1 deletion examples/mnist_cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@

model = Sequential()

model.add(Convolution2D(32, 1, 3, 3, border_mode='full'))
model.add(Convolution2D(32, 1, 3, 3, border_mode='full'))
model.add(Activation('relu'))
model.add(Convolution2D(32, 32, 3, 3))
model.add(Activation('relu'))
Expand Down
2 changes: 0 additions & 2 deletions examples/mnist_mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
nb_classes = 10
nb_epoch = 20



# the data, shuffled and split between tran and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

Expand Down
69 changes: 37 additions & 32 deletions examples/skipgram_word_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@

'''
We loop over words in a dataset, and for each word, we look at a context window around the word.
We loop over words in a dataset, and for each word, we look at a context window around the word.
We generate pairs of (pivot_word, other_word_from_same_context) with label 1,
and pairs of (pivot_word, random_word) with label 0 (skip-gram method).
We use the layer WordContextProduct to learn embeddings for the word couples,
and compute a proximity score between the embeddings (= p(context|word)),
trained with our positive and negative labels.
We then use the weights computed by WordContextProduct to encode words
and demonstrate that the geometry of the embedding space
We then use the weights computed by WordContextProduct to encode words
and demonstrate that the geometry of the embedding space
captures certain useful semantic properties.
Read more about skip-gram in this particularly gnomic paper by Mikolov et al.:
Read more about skip-gram in this particularly gnomic paper by Mikolov et al.:
http://arxiv.org/pdf/1301.3781v3.pdf
Note: you should run this on GPU, otherwise training will be quite slow.
Note: you should run this on GPU, otherwise training will be quite slow.
On a EC2 GPU instance, expect 3 hours per 10e6 comments (~10e8 words) per epoch with dim_proj=256.
Should be much faster on a modern GPU.
GPU command:
THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python skipgram_word_embeddings.py
Dataset: 5,845,908 Hacker News comments.
Obtain the dataset at:
https://mega.co.nz/#F!YohlwD7R!wec0yNO86SeaNGIYQBOR0A
Dataset: 5,845,908 Hacker News comments.
Obtain the dataset at:
https://mega.co.nz/#F!YohlwD7R!wec0yNO86SeaNGIYQBOR0A
(HNCommentsAll.1perline.json.bz2)
'''
from __future__ import absolute_import
Expand Down Expand Up @@ -66,6 +66,7 @@
to_replace = [(''', "'")]
hex_tags = re.compile(r'&.*?;')


def clean_comment(comment):
c = str(comment.encode("utf-8"))
c = html_tags.sub(' ', c)
Expand All @@ -74,6 +75,7 @@ def clean_comment(comment):
c = hex_tags.sub(' ', c)
return c


def text_generator(path=data_path):
f = open(path)
for i, l in enumerate(f):
Expand Down Expand Up @@ -120,7 +122,7 @@ def text_generator(path=data_path):
progbar = generic_utils.Progbar(tokenizer.document_count)
samples_seen = 0
losses = []

for i, seq in enumerate(tokenizer.texts_to_sequences_generator(text_generator())):
# get skipgram couples for one text in the dataset
couples, labels = sequence.skipgrams(seq, max_features, window_size=4, negative_samples=1., sampling_table=sampling_table)
Expand Down Expand Up @@ -158,26 +160,29 @@ def text_generator(path=data_path):
reverse_word_index = dict([(v, k) for k, v in list(word_index.items())])
word_index = tokenizer.word_index


def embed_word(w):
i = word_index.get(w)
if (not i) or (i<skip_top) or (i>=max_features):
if (not i) or (i < skip_top) or (i >= max_features):
return None
return norm_weights[i]


def closest_to_point(point, nb_closest=10):
proximities = np.dot(norm_weights, point)
tups = list(zip(list(range(len(proximities))), proximities))
tups.sort(key=lambda x: x[1], reverse=True)
return [(reverse_word_index.get(t[0]), t[1]) for t in tups[:nb_closest]]
return [(reverse_word_index.get(t[0]), t[1]) for t in tups[:nb_closest]]


def closest_to_word(w, nb_closest=10):
i = word_index.get(w)
if (not i) or (i<skip_top) or (i>=max_features):
if (not i) or (i < skip_top) or (i >= max_features):
return []
return closest_to_point(norm_weights[i].T, nb_closest)


''' the resuls in comments below were for:
''' the resuls in comments below were for:
5.8M HN comments
dim_proj = 256
nb_epoch = 2
Expand All @@ -187,31 +192,31 @@ def closest_to_word(w, nb_closest=10):
skip_top = 100
negative_samples = 1.
window_size = 4
and frequency subsampling of factor 10e-5.
and frequency subsampling of factor 10e-5.
'''

words = ["article", # post, story, hn, read, comments
"3", # 6, 4, 5, 2
"two", # three, few, several, each
"great", # love, nice, working, looking
"data", # information, memory, database
"money", # company, pay, customers, spend
"years", # ago, year, months, hours, week, days
"android", # ios, release, os, mobile, beta
"javascript", # js, css, compiler, library, jquery, ruby
"look", # looks, looking
"business", # industry, professional, customers
"company", # companies, startup, founders, startups
"after", # before, once, until
"own", # personal, our, having
"us", # united, country, american, tech, diversity, usa, china, sv
"using", # javascript, js, tools (lol)
"here", # hn, post, comments
words = [
"article", # post, story, hn, read, comments
"3", # 6, 4, 5, 2
"two", # three, few, several, each
"great", # love, nice, working, looking
"data", # information, memory, database
"money", # company, pay, customers, spend
"years", # ago, year, months, hours, week, days
"android", # ios, release, os, mobile, beta
"javascript", # js, css, compiler, library, jquery, ruby
"look", # looks, looking
"business", # industry, professional, customers
"company", # companies, startup, founders, startups
"after", # before, once, until
"own", # personal, our, having
"us", # united, country, american, tech, diversity, usa, china, sv
"using", # javascript, js, tools (lol)
"here", # hn, post, comments
]

for w in words:
res = closest_to_word(w)
print('====', w)
for r in res:
print(r)

9 changes: 9 additions & 0 deletions keras/activations.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,44 @@
from __future__ import absolute_import
import theano.tensor as T


def softmax(x):
return T.nnet.softmax(x.reshape((-1, x.shape[-1]))).reshape(x.shape)


def time_distributed_softmax(x):
import warnings
warnings.warn("time_distributed_softmax is deprecated. Just use softmax!", DeprecationWarning)
return softmax(x)


def softplus(x):
return T.nnet.softplus(x)


def relu(x):
return (x + abs(x)) / 2.0


def tanh(x):
return T.tanh(x)


def sigmoid(x):
return T.nnet.sigmoid(x)


def hard_sigmoid(x):
return T.nnet.hard_sigmoid(x)


def linear(x):
'''
The function returns the variable that is passed in, so all types work
'''
return x


from .utils.generic_utils import get_from_module
def get(identifier):
return get_from_module(identifier, globals(), 'activation function')
Loading

0 comments on commit ec8f7f0

Please sign in to comment.