Skip to content

Commit

Permalink
Remove accents and period from code comments
Browse files Browse the repository at this point in the history
  • Loading branch information
astonzhang committed Feb 3, 2023
1 parent 86bb603 commit 6d7b666
Show file tree
Hide file tree
Showing 22 changed files with 71 additions and 64 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ def nadaraya_watson(x_train, y_train, x_val, kernel):
dists = d2l.reshape(x_train, (-1, 1)) - d2l.reshape(x_val, (1, -1))
# Each column/row corresponds to each query/key
k = d2l.astype(kernel(dists), d2l.float32)
attention_w = k / d2l.reduce_sum(k, 0) # Normalization over keys for each query
# Normalization over keys for each query
attention_w = k / d2l.reduce_sum(k, 0)
if tab.selected('pytorch'):
y_hat = y_train@attention_w
if tab.selected('mxnet'):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def masked_softmax(X, valid_lens): #@save
%%tab jax
def masked_softmax(X, valid_lens): #@save
"""Perform softmax operation by masking elements on the last axis."""
# `X`: 3D tensor, `valid_lens`: 1D or 2D tensor
# X: 3D tensor, valid_lens: 1D or 2D tensor
def _sequence_mask(X, valid_len, value=0):
maxlen = X.shape[1]
mask = jnp.arange((maxlen),
Expand Down
2 changes: 1 addition & 1 deletion chapter_builders-guide/init-param.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ with standard deviation 0.01, while bias parameters cleared to zero.

```{.python .input}
%%tab mxnet
# Here `force_reinit` ensures that parameters are freshly initialized even if
# Here force_reinit ensures that parameters are freshly initialized even if
# they were already initialized previously
net.initialize(init=init.Normal(sigma=0.01), force_reinit=True)
net[0].weight.data()[0]
Expand Down
10 changes: 5 additions & 5 deletions chapter_builders-guide/model-construction.md
Original file line number Diff line number Diff line change
Expand Up @@ -537,15 +537,15 @@ So we implement a `FixedHiddenMLP` class as follows.
class FixedHiddenMLP(nn.Block):
def __init__(self):
super().__init__()
# Random weight parameters created with the `get_constant` method
# Random weight parameters created with the get_constant method
# are not updated during training (i.e., constant parameters)
self.rand_weight = self.params.get_constant(
'rand_weight', np.random.uniform(size=(20, 20)))
self.dense = nn.Dense(20, activation='relu')
def forward(self, X):
X = self.dense(X)
# Use the created constant parameters, as well as the `relu` and `dot`
# Use the created constant parameters, as well as the relu and dot
# functions
X = npx.relu(np.dot(X, self.rand_weight.data()) + 1)
# Reuse the fully connected layer. This is equivalent to sharing
Expand Down Expand Up @@ -585,15 +585,15 @@ class FixedHiddenMLP(tf.keras.Model):
def __init__(self):
super().__init__()
self.flatten = tf.keras.layers.Flatten()
# Random weight parameters created with `tf.constant` are not updated
# Random weight parameters created with tf.constant are not updated
# during training (i.e., constant parameters)
self.rand_weight = tf.constant(tf.random.uniform((20, 20)))
self.dense = tf.keras.layers.Dense(20, activation=tf.nn.relu)
def call(self, inputs):
X = self.flatten(inputs)
# Use the created constant parameters, as well as the `relu` and
# `matmul` functions
# Use the created constant parameters, as well as the relu and
# matmul functions
X = tf.nn.relu(tf.matmul(X, self.rand_weight) + 1)
# Reuse the fully connected layer. This is equivalent to sharing
# parameters with two fully connected layers
Expand Down
2 changes: 1 addition & 1 deletion chapter_builders-guide/use-gpu.md
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ X

```{.python .input}
%%tab jax
# By default jax puts arrays to GPUs or TPUs if available
# By default JAX puts arrays to GPUs or TPUs if available
X = jax.device_put(jnp.ones((2, 3)), try_gpu())
X
```
Expand Down
20 changes: 10 additions & 10 deletions chapter_convolutional-modern/batch-norm.md
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ from mxnet.gluon import nn
npx.set_np()
def batch_norm(X, gamma, beta, moving_mean, moving_var, eps, momentum):
# Use `autograd` to determine whether we are in training mode
# Use autograd to determine whether we are in training mode
if not autograd.is_training():
# In prediction mode, use mean and variance obtained by moving average
X_hat = (X - moving_mean) / np.sqrt(moving_var + eps)
Expand All @@ -264,7 +264,7 @@ def batch_norm(X, gamma, beta, moving_mean, moving_var, eps, momentum):
else:
# When using a two-dimensional convolutional layer, calculate the
# mean and variance on the channel dimension (axis=1). Here we
# need to maintain the shape of `X`, so that the broadcasting
# need to maintain the shape of X, so that the broadcasting
# operation can be carried out later
mean = X.mean(axis=(0, 2, 3), keepdims=True)
var = ((X - mean) ** 2).mean(axis=(0, 2, 3), keepdims=True)
Expand All @@ -284,7 +284,7 @@ import torch
from torch import nn
def batch_norm(X, gamma, beta, moving_mean, moving_var, eps, momentum):
# Use `is_grad_enabled` to determine whether we are in training mode
# Use is_grad_enabled to determine whether we are in training mode
if not torch.is_grad_enabled():
# In prediction mode, use mean and variance obtained by moving average
X_hat = (X - moving_mean) / torch.sqrt(moving_var + eps)
Expand All @@ -298,7 +298,7 @@ def batch_norm(X, gamma, beta, moving_mean, moving_var, eps, momentum):
else:
# When using a two-dimensional convolutional layer, calculate the
# mean and variance on the channel dimension (axis=1). Here we
# need to maintain the shape of `X`, so that the broadcasting
# need to maintain the shape of X, so that the broadcasting
# operation can be carried out later
mean = X.mean(dim=(0, 2, 3), keepdim=True)
var = ((X - mean) ** 2).mean(dim=(0, 2, 3), keepdim=True)
Expand Down Expand Up @@ -425,9 +425,9 @@ class BatchNorm(nn.Block):
```{.python .input}
%%tab pytorch
class BatchNorm(nn.Module):
# `num_features`: the number of outputs for a fully connected layer
# or the number of output channels for a convolutional layer. `num_dims`:
# 2 for a fully connected layer and 4 for a convolutional layer
# num_features: the number of outputs for a fully connected layer or the
# number of output channels for a convolutional layer. num_dims: 2 for a
# fully connected layer and 4 for a convolutional layer
def __init__(self, num_features, num_dims):
super().__init__()
if num_dims == 2:
Expand All @@ -444,12 +444,12 @@ class BatchNorm(nn.Module):
self.moving_var = torch.ones(shape)
def forward(self, X):
# If `X` is not on the main memory, copy `moving_mean` and
# `moving_var` to the device where `X` is located
# If X is not on the main memory, copy moving_mean and moving_var to
# the device where X is located
if self.moving_mean.device != X.device:
self.moving_mean = self.moving_mean.to(X.device)
self.moving_var = self.moving_var.to(X.device)
# Save the updated `moving_mean` and `moving_var`
# Save the updated moving_mean and moving_var
Y, self.moving_mean, self.moving_var = batch_norm(
X, self.gamma, self.beta, self.moving_mean,
self.moving_var, eps=1e-5, momentum=0.1)
Expand Down
6 changes: 3 additions & 3 deletions chapter_convolutional-modern/googlenet.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ from mxnet.gluon import nn
npx.set_np()
class Inception(nn.Block):
# `c1`--`c4` are the number of output channels for each branch
# c1--c4 are the number of output channels for each branch
def __init__(self, c1, c2, c3, c4, **kwargs):
super(Inception, self).__init__(**kwargs)
# Branch 1
Expand Down Expand Up @@ -82,7 +82,7 @@ from torch import nn
from torch.nn import functional as F
class Inception(nn.Module):
# `c1`--`c4` are the number of output channels for each branch
# c1--c4 are the number of output channels for each branch
def __init__(self, c1, c2, c3, c4, **kwargs):
super(Inception, self).__init__(**kwargs)
# Branch 1
Expand Down Expand Up @@ -111,7 +111,7 @@ import tensorflow as tf
from d2l import tensorflow as d2l
class Inception(tf.keras.Model):
# `c1`--`c4` are the number of output channels for each branch
# c1--c4 are the number of output channels for each branch
def __init__(self, c1, c2, c3, c4):
super().__init__()
self.b1_1 = tf.keras.layers.Conv2D(c1, 1, activation='relu')
Expand Down
4 changes: 2 additions & 2 deletions chapter_convolutional-neural-networks/channels.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ to [**calculate the output of multiple channels**] as shown below.
```{.python .input}
%%tab all
def corr2d_multi_in_out(X, K):
# Iterate through the 0th dimension of `K`, and each time, perform
# cross-correlation operations with input `X`. All of the results are
# Iterate through the 0th dimension of K, and each time, perform
# cross-correlation operations with input X. All of the results are
# stacked together
return d2l.stack([corr2d_multi_in(X, k) for k in K], 0)
```
Expand Down
16 changes: 9 additions & 7 deletions chapter_convolutional-neural-networks/padding-and-strides.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ npx.set_np()
# We define a helper function to calculate convolutions. It initializes
# the convolutional layer weights and performs corresponding dimensionality
# elevations and reductions on the input and output.
# elevations and reductions on the input and output
def comp_conv2d(conv2d, X):
conv2d.initialize()
# (1, 1) indicates that batch size and the number of channels are both 1
Expand All @@ -135,16 +135,17 @@ comp_conv2d(conv2d, X).shape
import torch
from torch import nn
# We define a helper function to calculate convolutions. It initializes
# the convolutional layer weights and performs corresponding dimensionality
# We define a helper function to calculate convolutions. It initializes the
# convolutional layer weights and performs corresponding dimensionality
# elevations and reductions on the input and output
def comp_conv2d(conv2d, X):
# (1, 1) indicates that batch size and the number of channels are both 1
X = X.reshape((1, 1) + X.shape)
Y = conv2d(X)
# Strip the first two dimensions: examples and channels
return Y.reshape(Y.shape[2:])
# 1 row and column is padded on either side, so a total of 2 rows or columns are added
# 1 row and column is padded on either side, so a total of 2 rows or columns
# are added
conv2d = nn.LazyConv2d(1, kernel_size=3, padding=1)
X = torch.rand(size=(8, 8))
comp_conv2d(conv2d, X).shape
Expand All @@ -163,7 +164,8 @@ def comp_conv2d(conv2d, X):
Y = conv2d(X)
# Strip the first two dimensions: examples and channels
return tf.reshape(Y, Y.shape[1:3])
# 1 row and column is padded on either side, so a total of 2 rows or columns are added
# 1 row and column is padded on either side, so a total of 2 rows or columns
# are added
conv2d = tf.keras.layers.Conv2D(1, kernel_size=3, padding='same')
X = tf.random.uniform(shape=(8, 8))
comp_conv2d(conv2d, X).shape
Expand Down Expand Up @@ -206,8 +208,8 @@ comp_conv2d(conv2d, X).shape

```{.python .input}
%%tab pytorch
# We use a convolution kernel with height 5 and width 3. The padding on
# either side of the height and width are 2 and 1, respectively
# We use a convolution kernel with height 5 and width 3. The padding on either
# side of the height and width are 2 and 1, respectively
conv2d = nn.LazyConv2d(1, kernel_size=(5, 3), padding=(2, 1))
comp_conv2d(conv2d, X).shape
```
Expand Down
3 changes: 2 additions & 1 deletion chapter_convolutional-neural-networks/pooling.md
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,8 @@ X

```{.python .input}
%%tab tensorflow, jax
X = d2l.concat([X, X + 1], 3) # Concatenate along `dim=3` due to channels-last syntax
# Concatenate along `dim=3` due to channels-last syntax
X = d2l.concat([X, X + 1], 3)
X
```

Expand Down
5 changes: 3 additions & 2 deletions chapter_linear-classification/softmax-regression-concise.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class SoftmaxRegression(d2l.Classifier): #@save
@nn.compact
def __call__(self, X):
X = X.reshape((X.shape[0], -1)) # flatten
X = X.reshape((X.shape[0], -1)) # Flatten
X = nn.Dense(self.num_outputs)(X)
return X
```
Expand Down Expand Up @@ -194,8 +194,9 @@ def loss(self, Y_hat, Y, averaged=True):
@d2l.add_to_class(d2l.Classifier) #@save
@partial(jax.jit, static_argnums=(0, 5))
def loss(self, params, X, Y, state, averaged=True):
# To be used later (e.g., for batch norm)
Y_hat = state.apply_fn({'params': params}, *X,
mutable=False, rngs=None) # To be used later (e.g., for batch norm)
mutable=False, rngs=None)
Y_hat = d2l.reshape(Y_hat, (-1, Y_hat.shape[-1]))
Y = d2l.reshape(Y, (-1,))
fn = optax.softmax_cross_entropy_with_integer_labels
Expand Down
2 changes: 1 addition & 1 deletion chapter_linear-regression/weight-decay.md
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ class WeightDecay(d2l.LinearRegression):
wd: int = 0
def configure_optimizers(self):
# Weight Decay is not available directly within `optax.sgd`, but
# Weight Decay is not available directly within optax.sgd, but
# optax allows chaining several transformations together
return optax.chain(optax.additive_weight_decay(self.wd),
optax.sgd(self.lr))
Expand Down
4 changes: 2 additions & 2 deletions chapter_multilayer-perceptrons/kaggle-house-price.md
Original file line number Diff line number Diff line change
Expand Up @@ -246,12 +246,12 @@ def preprocess(self):
(self.raw_train.drop(columns=['Id', label]),
self.raw_val.drop(columns=['Id'])))
# Standardize numerical columns
numeric_features = features.dtypes[features.dtypes != 'object'].index
numeric_features = features.dtypes[features.dtypes!='object'].index
features[numeric_features] = features[numeric_features].apply(
lambda x: (x - x.mean()) / (x.std()))
# Replace NAN numerical features by 0
features[numeric_features] = features[numeric_features].fillna(0)
# Replace discrete features by one-hot encoding.
# Replace discrete features by one-hot encoding
features = pd.get_dummies(features, dummy_na=True)
# Save preprocessed features
self.train = features[:self.raw_train.shape[0]].copy()
Expand Down
15 changes: 8 additions & 7 deletions chapter_preliminaries/autograd.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ x = tf.Variable(x)

```{.python .input n=10}
%%tab mxnet
# Our code is inside an `autograd.record` scope to build the computational graph
# Our code is inside an `autograd.record` scope to build the computational
# graph
with autograd.record():
y = 2 * np.dot(x, x)
y
Expand Down Expand Up @@ -203,7 +204,7 @@ x_grad
```{.python .input}
%%tab jax
from jax import grad
# the `grad` transform returns a Python function that
# The `grad` transform returns a Python function that
# computes the gradient of the original function
x_grad = grad(y)(x)
x_grad
Expand Down Expand Up @@ -376,13 +377,13 @@ x.grad
%%tab tensorflow
with tf.GradientTape() as t:
y = x * x
t.gradient(y, x) # Same as `y = tf.reduce_sum(x * x)`
t.gradient(y, x) # Same as y = tf.reduce_sum(x * x)
```

```{.python .input}
%%tab jax
y = lambda x: x * x
# `grad` is only defined for scalar output functions
# grad is only defined for scalar output functions
grad(lambda x: y(x).sum())(x)
```

Expand Down Expand Up @@ -434,8 +435,8 @@ x.grad == u

```{.python .input}
%%tab tensorflow
# Set `persistent=True` to preserve the compute graph.
# This lets us run `t.gradient` more than once
# Set persistent=True to preserve the compute graph.
# This lets us run t.gradient more than once
with tf.GradientTape(persistent=True) as t:
y = x * x
u = tf.stop_gradient(y)
Expand All @@ -450,7 +451,7 @@ x_grad == u
import jax
y = lambda x: x * x
# `jax.lax` primitives are Python wrappers around XLA operations
# jax.lax primitives are Python wrappers around XLA operations
u = jax.lax.stop_gradient(y(x))
z = lambda x: u * x
Expand Down
2 changes: 1 addition & 1 deletion chapter_preliminaries/calculus.md
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def plot(X, Y=None, xlabel=None, ylabel=None, legend=[], xlim=None,
fmts=('-', 'm--', 'g-.', 'r:'), figsize=(3.5, 2.5), axes=None):
"""Plot data points."""
def has_one_axis(X): # True if `X` (tensor or list) has 1 axis
def has_one_axis(X): # True if X (tensor or list) has 1 axis
return (hasattr(X, "ndim") and X.ndim == 1 or isinstance(X, list)
and not hasattr(X[0], "__len__"))
Expand Down
10 changes: 5 additions & 5 deletions chapter_preliminaries/linear-algebra.md
Original file line number Diff line number Diff line change
Expand Up @@ -383,21 +383,21 @@ same shape as their operands.
```{.python .input}
%%tab mxnet
A = np.arange(6).reshape(2, 3)
B = A.copy() # Assign a copy of `A` to `B` by allocating new memory
B = A.copy() # Assign a copy of A to B by allocating new memory
A, A + B
```

```{.python .input}
%%tab pytorch
A = torch.arange(6, dtype=torch.float32).reshape(2, 3)
B = A.clone() # Assign a copy of `A` to `B` by allocating new memory
B = A.clone() # Assign a copy of A to B by allocating new memory
A, A + B
```

```{.python .input}
%%tab tensorflow
A = tf.reshape(tf.range(6, dtype=tf.float32), (2, 3))
B = A # No cloning of `A` to `B` by allocating new memory
B = A # No cloning of A to B by allocating new memory
A, A + B
```

Expand Down Expand Up @@ -548,12 +548,12 @@ is equivalent to summing up all the elements of the matrix.

```{.python .input}
%%tab mxnet, pytorch, jax
A.sum(axis=[0, 1]) == A.sum() # Same as `A.sum()`
A.sum(axis=[0, 1]) == A.sum() # Same as A.sum()
```

```{.python .input}
%%tab tensorflow
tf.reduce_sum(A, axis=[0, 1]), tf.reduce_sum(A) # Same as `tf.reduce_sum(A)`
tf.reduce_sum(A, axis=[0, 1]), tf.reduce_sum(A) # Same as tf.reduce_sum(A)
```

[**A related quantity is the *mean*, also called the *average*.**]
Expand Down
2 changes: 1 addition & 1 deletion chapter_preliminaries/ndarray.md
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ X_var

```{.python .input}
%%tab jax
# JAX arrays are immutable. `jax.numpy.ndarray.at` index
# JAX arrays are immutable. jax.numpy.ndarray.at index
# update operators create a new array with the corresponding
# modifications made
X_new_1 = X.at[1, 2].set(17)
Expand Down
Loading

0 comments on commit 6d7b666

Please sign in to comment.