Skip to content

Commit

Permalink
add complete files
Browse files Browse the repository at this point in the history
  • Loading branch information
YuanchengFang committed Sep 29, 2022
1 parent 25012a5 commit b17fd8c
Show file tree
Hide file tree
Showing 19 changed files with 2,105 additions and 23 deletions.
142 changes: 139 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,140 @@
**/__pycache__/
**/.pytest_cache/
# Do not submit .ipynb
*.ipynb

.DS_Store
.ipynb_checkpoints/
env/
tests/__pycache__
.idea/


# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so
.vscode/
*~

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/
3 changes: 3 additions & 0 deletions hw0/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# NOTE: on MacOS you need to add an addition flag: -undefined dynamic_lookup
default:
c++ -O3 -Wall -shared -std=c++11 -fPIC $$(python3 -m pybind11 --includes) src/simple_ml_ext.cpp -o src/simple_ml_ext.so
Binary file added hw0/data/t10k-images-idx3-ubyte.gz
Binary file not shown.
Binary file added hw0/data/t10k-labels-idx1-ubyte.gz
Binary file not shown.
Binary file added hw0/data/train-images-idx3-ubyte.gz
Binary file not shown.
Binary file added hw0/data/train-labels-idx1-ubyte.gz
Binary file not shown.
37 changes: 19 additions & 18 deletions hw0/simple_ml.py → hw0/src/simple_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def add(x, y):
### END YOUR CODE


def parse_mnist(image_filesname, label_filename):
def parse_mnist(image_filename, label_filename):
""" Read an images and labels file in MNIST format. See this page:
http://yann.lecun.com/exdb/mnist/ for a description of the file format.
Expand All @@ -34,20 +34,21 @@ def parse_mnist(image_filesname, label_filename):
Returns:
Tuple (X,y):
X (numpy.ndarray[np.float32]): 2D numpy array containing the loaded
data. The dimensionality of the data should be
(num_examples x input_dim) where 'input_dim' is the full
dimension of the data, e.g., since MNIST images are 28x28, it
will be 784. Values should be of type np.float32, and the data
should be normalized to have a minimum value of 0.0 and a
maximum value of 1.0.
y (numpy.ndarray[dypte=np.uint8]): 1D numpy array containing the
X (numpy.ndarray[np.float32]): 2D numpy array containing the loaded
data. The dimensionality of the data should be
(num_examples x input_dim) where 'input_dim' is the full
dimension of the data, e.g., since MNIST images are 28x28, it
will be 784. Values should be of type np.float32, and the data
should be normalized to have a minimum value of 0.0 and a
maximum value of 1.0. The normalization should be applied uniformly
across the whole dataset, _not_ individual images.
y (numpy.ndarray[dtype=np.uint8]): 1D numpy array containing the
labels of the examples. Values should be of type np.uint8 and
for MNIST will contain the values 0-9.
"""
### BEGIN YOUR CODE
with gzip.open(image_filesname, 'rb') as f:
with gzip.open(image_filename, 'rb') as f:
file_content = f.read()
# use big-endian!
num = struct.unpack('>I', file_content[4:8])[0]
Expand Down Expand Up @@ -83,9 +84,9 @@ def softmax_loss(Z, y):
"""
### BEGIN YOUR CODE
# SIMPLIFED VERSION OF SOFTMAX + CROSS ENTROPY LOSS
Z_y = [z[ind] for z, ind in zip(Z, y)]
Z = np.log(np.exp(Z).sum(axis=1))
return np.mean(Z - Z_y)
Z_y = Z[np.arange(Z.shape[0]), y]
Z_sum = np.log(np.exp(Z).sum(axis=1))
return np.mean(Z_sum - Z_y)
### END YOUR CODE


Expand All @@ -100,7 +101,7 @@ def softmax_regression_epoch(X, y, theta, lr = 0.1, batch=100):
(num_examples x input_dim).
y (np.ndarray[np.uint8]): 1D class label array of size (num_examples,)
theta (np.ndarrray[np.float32]): 2D array of softmax regression
parameter, of shape (input_dim, num_classes)
parameters, of shape (input_dim, num_classes)
lr (float): step size (learning rate) for SGD
batch (int): size of SGD minibatch
Expand Down Expand Up @@ -130,7 +131,7 @@ def softmax_regression_epoch(X, y, theta, lr = 0.1, batch=100):
def nn_epoch(X, y, W1, W2, lr = 0.1, batch=100):
""" Run a single epoch of SGD for a two-layer neural network defined by the
weights W1 and W2 (with no bias terms):
logits = ReLU(X * W1) * W1
logits = ReLU(X * W1) * W2
The function should use the step size lr, and the specified batch size (and
again, without randomizing the order of X). It should modify the
W1 and W2 matrices in place.
Expand All @@ -139,9 +140,9 @@ def nn_epoch(X, y, W1, W2, lr = 0.1, batch=100):
X (np.ndarray[np.float32]): 2D input array of size
(num_examples x input_dim).
y (np.ndarray[np.uint8]): 1D class label array of size (num_examples,)
W1 (np.ndarrray[np.float32]): 2D array of first layer weights, of shape
W1 (np.ndarray[np.float32]): 2D array of first layer weights, of shape
(input_dim, hidden_dim)
W2 (np.ndarrray[np.float32]): 2D array of second layer weights, of shape
W2 (np.ndarray[np.float32]): 2D array of second layer weights, of shape
(hidden_dim, num_classes)
lr (float): step size (learning rate) for SGD
batch (int): size of SGD minibatch
Expand Down
4 changes: 2 additions & 2 deletions hw0/simple_ml_ext.cpp → hw0/src/simple_ml_ext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ void softmax_regression_epoch_cpp(const float *X, const unsigned char *y,
* X (const float *): pointer to X data, of size m*n, stored in row
* major (C) format
* y (const unsigned char *): pointer to y data, of size m
* theta (foat *): pointer to theta data, of size n*k, stored in row
* theta (float *): pointer to theta data, of size n*k, stored in row
* major (C) format
* m (size_t): number of exmaples
* m (size_t): number of examples
* n (size_t): input dimension
* k (size_t): number of classes
* lr (float): learning rate / SGD step size
Expand Down
Loading

0 comments on commit b17fd8c

Please sign in to comment.