add complete files

YuanchengFang · Sep 29, 2022 · b17fd8c · b17fd8c
1 parent 25012a5
commit b17fd8c
Show file tree

Hide file tree

Showing 19 changed files with 2,105 additions and 23 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,140 @@
-**/__pycache__/
-**/.pytest_cache/
+# Do not submit .ipynb
+*.ipynb
+
+.DS_Store
+.ipynb_checkpoints/
+env/
+tests/__pycache__
+.idea/
+
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
 *.so
-.vscode/
+*~
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
diff --git a/hw0/Makefile b/hw0/Makefile
@@ -0,0 +1,3 @@
+# NOTE: on MacOS you need to add an addition flag: -undefined dynamic_lookup
+default:
+	c++ -O3 -Wall -shared -std=c++11 -fPIC $$(python3 -m pybind11 --includes) src/simple_ml_ext.cpp -o src/simple_ml_ext.so
diff --git a/hw0/data/t10k-images-idx3-ubyte.gz b/hw0/data/t10k-images-idx3-ubyte.gz
diff --git a/hw0/data/t10k-labels-idx1-ubyte.gz b/hw0/data/t10k-labels-idx1-ubyte.gz
diff --git a/hw0/data/train-images-idx3-ubyte.gz b/hw0/data/train-images-idx3-ubyte.gz
diff --git a/hw0/data/train-labels-idx1-ubyte.gz b/hw0/data/train-labels-idx1-ubyte.gz
diff --git a/hw0/simple_ml.py → hw0/src/simple_ml.py b/hw0/simple_ml.py → hw0/src/simple_ml.py
@@ -24,7 +24,7 @@ def add(x, y):
     ### END YOUR CODE
 
 
-def parse_mnist(image_filesname, label_filename):
+def parse_mnist(image_filename, label_filename):
     """ Read an images and labels file in MNIST format.  See this page:
     http://yann.lecun.com/exdb/mnist/ for a description of the file format.
 
@@ -34,20 +34,21 @@ def parse_mnist(image_filesname, label_filename):
 
     Returns:
         Tuple (X,y):
-            X (numpy.ndarray[np.float32]): 2D numpy array containing the loaded
-                data.  The dimensionality of the data should be
-                (num_examples x input_dim) where 'input_dim' is the full
-                dimension of the data, e.g., since MNIST images are 28x28, it
-                will be 784.  Values should be of type np.float32, and the data
-                should be normalized to have a minimum value of 0.0 and a
-                maximum value of 1.0.
-
-            y (numpy.ndarray[dypte=np.uint8]): 1D numpy array containing the
+            X (numpy.ndarray[np.float32]): 2D numpy array containing the loaded 
+                data.  The dimensionality of the data should be 
+                (num_examples x input_dim) where 'input_dim' is the full 
+                dimension of the data, e.g., since MNIST images are 28x28, it 
+                will be 784.  Values should be of type np.float32, and the data 
+                should be normalized to have a minimum value of 0.0 and a 
+                maximum value of 1.0. The normalization should be applied uniformly
+                across the whole dataset, _not_ individual images.
+
+            y (numpy.ndarray[dtype=np.uint8]): 1D numpy array containing the
                 labels of the examples.  Values should be of type np.uint8 and
                 for MNIST will contain the values 0-9.
     """
     ### BEGIN YOUR CODE
-    with gzip.open(image_filesname, 'rb') as f:
+    with gzip.open(image_filename, 'rb') as f:
         file_content = f.read()
         # use big-endian!
         num = struct.unpack('>I', file_content[4:8])[0]
@@ -83,9 +84,9 @@ def softmax_loss(Z, y):
     """
     ### BEGIN YOUR CODE
     # SIMPLIFED VERSION OF SOFTMAX + CROSS ENTROPY LOSS
-    Z_y = [z[ind] for z, ind in zip(Z, y)]
-    Z = np.log(np.exp(Z).sum(axis=1))
-    return np.mean(Z - Z_y)
+    Z_y = Z[np.arange(Z.shape[0]), y]
+    Z_sum = np.log(np.exp(Z).sum(axis=1))
+    return np.mean(Z_sum - Z_y)
     ### END YOUR CODE
 
 
@@ -100,7 +101,7 @@ def softmax_regression_epoch(X, y, theta, lr = 0.1, batch=100):
             (num_examples x input_dim).
         y (np.ndarray[np.uint8]): 1D class label array of size (num_examples,)
         theta (np.ndarrray[np.float32]): 2D array of softmax regression
-            parameter, of shape (input_dim, num_classes)
+            parameters, of shape (input_dim, num_classes)
         lr (float): step size (learning rate) for SGD
         batch (int): size of SGD minibatch
 
@@ -130,7 +131,7 @@ def softmax_regression_epoch(X, y, theta, lr = 0.1, batch=100):
 def nn_epoch(X, y, W1, W2, lr = 0.1, batch=100):
     """ Run a single epoch of SGD for a two-layer neural network defined by the
     weights W1 and W2 (with no bias terms):
-        logits = ReLU(X * W1) * W1
+        logits = ReLU(X * W1) * W2
     The function should use the step size lr, and the specified batch size (and
     again, without randomizing the order of X).  It should modify the
     W1 and W2 matrices in place.
@@ -139,9 +140,9 @@ def nn_epoch(X, y, W1, W2, lr = 0.1, batch=100):
         X (np.ndarray[np.float32]): 2D input array of size
             (num_examples x input_dim).
         y (np.ndarray[np.uint8]): 1D class label array of size (num_examples,)
-        W1 (np.ndarrray[np.float32]): 2D array of first layer weights, of shape
+        W1 (np.ndarray[np.float32]): 2D array of first layer weights, of shape
             (input_dim, hidden_dim)
-        W2 (np.ndarrray[np.float32]): 2D array of second layer weights, of shape
+        W2 (np.ndarray[np.float32]): 2D array of second layer weights, of shape
             (hidden_dim, num_classes)
         lr (float): step size (learning rate) for SGD
         batch (int): size of SGD minibatch

diff --git a/hw0/simple_ml_ext.cpp → hw0/src/simple_ml_ext.cpp b/hw0/simple_ml_ext.cpp → hw0/src/simple_ml_ext.cpp
@@ -20,9 +20,9 @@ void softmax_regression_epoch_cpp(const float *X, const unsigned char *y,
      *     X (const float *): pointer to X data, of size m*n, stored in row
      *          major (C) format
      *     y (const unsigned char *): pointer to y data, of size m
-     *     theta (foat *): pointer to theta data, of size n*k, stored in row
+     *     theta (float *): pointer to theta data, of size n*k, stored in row
      *          major (C) format
-     *     m (size_t): number of exmaples
+     *     m (size_t): number of examples
      *     n (size_t): input dimension
      *     k (size_t): number of classes
      *     lr (float): learning rate / SGD step size