minor polish

rdeggau · Jan 11, 2020 · 0b8b457 · 0b8b457
1 parent 291ab31
commit 0b8b457
Show file tree

Hide file tree

Showing 12 changed files with 39 additions and 40 deletions.
diff --git a/chapter_computer-vision/object-detection-dataset.md b/chapter_computer-vision/object-detection-dataset.md
@@ -5,7 +5,7 @@ There are no small datasets, like MNIST or Fashion-MNIST, in the object detectio
 
 ## Downloading the Dataset
 
-The Pikachu dataset in RecordIO format can be downloaded directly from the Internet. 
+The Pikachu dataset in RecordIO format can be downloaded directly from the Internet.
 
 ```{.python .input  n=1}
 %matplotlib inline
@@ -16,8 +16,8 @@ import os
 npx.set_np()
 
 # Saved in the d2l package for later use
-d2l.DATA_HUB['pikachu'] = (d2l.DATA_URL+'pikachu.zip', 
-                         '68ab1bd42143c5966785eb0d7b2839df8d570190')
+d2l.DATA_HUB['pikachu'] = (d2l.DATA_URL + 'pikachu.zip',
+                           '68ab1bd42143c5966785eb0d7b2839df8d570190')
 ```
 
 ## Reading the Dataset
@@ -30,15 +30,15 @@ def load_data_pikachu(batch_size, edge_size=256):
     """Load the pikachu dataset."""
     data_dir = d2l.download_extract('pikachu')
     train_iter = image.ImageDetIter(
-        path_imgrec=data_dir+'train.rec',
-        path_imgidx=data_dir+'train.idx',
+        path_imgrec=data_dir + 'train.rec',
+        path_imgidx=data_dir + 'train.idx',
         batch_size=batch_size,
         data_shape=(3, edge_size, edge_size),  # The shape of the output image
         shuffle=True,  # Read the dataset in random order
         rand_crop=1,  # The probability of random cropping is 1
         min_object_covered=0.95, max_attempts=200)
     val_iter = image.ImageDetIter(
-        path_imgrec=data_dir+'val.rec', batch_size=batch_size,
+        path_imgrec=data_dir + 'val.rec', batch_size=batch_size,
         data_shape=(3, edge_size, edge_size), shuffle=False)
     return train_iter, val_iter
 ```

diff --git a/chapter_computer-vision/semantic-segmentation-and-dataset.md b/chapter_computer-vision/semantic-segmentation-and-dataset.md
@@ -28,14 +28,14 @@ import os
 npx.set_np()
 ```
 
-The original site might be unstable, we download the data from a mirror site. 
-The archive is about 2GB, so it will take some time to download. 
+The original site might be unstable, so we download the data from a mirror site. 
+The archive is about 2 GB, so it will take some time to download. 
 After you decompress the archive, the dataset is located in the `../data/VOCdevkit/VOC2012` path.
 
 ```{.python .input  n=2}
 # Saved in the d2l package for later use
-d2l.DATA_HUB['voc2012'] = (d2l.DATA_URL+'VOCtrainval_11-May-2012.tar',
-                          '4e443f8a2eca6b1dac8a6c57641b67dd40621a49')
+d2l.DATA_HUB['voc2012'] = (d2l.DATA_URL + 'VOCtrainval_11-May-2012.tar',
+                           '4e443f8a2eca6b1dac8a6c57641b67dd40621a49')
 
 voc_dir = d2l.download_extract('voc2012', 'VOCdevkit/VOC2012')
 ```

diff --git a/chapter_generative-adversarial-networks/dcgan.md b/chapter_generative-adversarial-networks/dcgan.md
@@ -19,8 +19,8 @@ The dataset we will use is a collection of Pokemon sprites obtained from [pokemo
 
 ```{.python .input  n=2}
 # Saved in the d2l package for later use
-d2l.DATA_HUB['pokemon'] = (d2l.DATA_URL+'pokemon.zip',
-                          'c065c0e2593b8b161a2d7873e42418bf6a21106c')
+d2l.DATA_HUB['pokemon'] = (d2l.DATA_URL + 'pokemon.zip',
+                           'c065c0e2593b8b161a2d7873e42418bf6a21106c')
 
 data_dir = d2l.download_extract('pokemon')
 pokemon = gluon.data.vision.datasets.ImageFolderDataset(data_dir)

diff --git a/chapter_natural-language-processing/sentiment-analysis.md b/chapter_natural-language-processing/sentiment-analysis.md
@@ -38,7 +38,7 @@ def read_imdb(data_dir, is_train):
     for label in ['pos/', 'neg/']:
         folder_name = data_dir + ('train/' if is_train else 'test/') + label
         for file in os.listdir(folder_name):
-            with open(folder_name+file, 'rb') as f:
+            with open(folder_name + file, 'rb') as f:
                 review = f.read().decode('utf-8').replace('\n', '')
                 data.append(review)
                 labels.append(1 if label == 'pos' else 0)

diff --git a/chapter_natural-language-processing/word2vec-dataset.md b/chapter_natural-language-processing/word2vec-dataset.md
@@ -20,13 +20,13 @@ This dataset has already been preprocessed. Each line of the dataset acts as a s
 
 ```{.python .input  n=2}
 # Saved in the d2l package for later use
-d2l.DATA_HUB['ptb'] = (d2l.DATA_URL+'ptb.zip', 
-                      '319d85e578af0cdc590547f26231e4e31cdf1e42')
+d2l.DATA_HUB['ptb'] = (d2l.DATA_URL + 'ptb.zip', 
+                       '319d85e578af0cdc590547f26231e4e31cdf1e42')
 
 # Saved in the d2l package for later use
 def read_ptb():
     data_dir = d2l.download_extract('ptb')
-    with open(data_dir+'ptb.train.txt') as f:
+    with open(data_dir + 'ptb.train.txt') as f:
         raw_text = f.read()
     return [line.split() for line in raw_text.split('\n')]
 

diff --git a/chapter_optimization/minibatch-sgd.md b/chapter_optimization/minibatch-sgd.md
@@ -106,8 +106,8 @@ Let's have a look at how minibatches are efficiently generated from data. In the
 
 ```{.python .input  n=1}
 # Saved in the d2l package for later use
-d2l.DATA_HUB['airfoil'] = (d2l.DATA_URL+'airfoil_self_noise.dat',
-                          '76e5be1548fd8222e5074cf0faae75edff8cf93f')
+d2l.DATA_HUB['airfoil'] = (d2l.DATA_URL + 'airfoil_self_noise.dat',
+                           '76e5be1548fd8222e5074cf0faae75edff8cf93f')
 
 # Saved in the d2l package for later use
 def get_data_ch11(batch_size=10, n=1500):

diff --git a/chapter_recommender-systems/ctr.md b/chapter_recommender-systems/ctr.md
@@ -7,7 +7,7 @@ Digital marketers use online advertising to display advertisements to customers.
 
 $$ \text{CTR} = \frac{\#\text{Clicks}} {\#\text{Impressions}} \times 100 \% .$$
 
-Click-through rate is an important signal that indicates the effectiveness of prediction algorithms. Click-through rate prediction is a task of predicting the likelihood that something on a website will be clicked. Models on CTR prediction can not only be employed in targeted advertising systems but also in general item (e.g., movies, news, products) recommender systems, email campaigns, and even search engines. It is also closely related to user satisfaction, conversion rate, and can be helpful in setting campaign goals as it can help advertisers to set realistic expectations. 
+Click-through rate is an important signal that indicates the effectiveness of prediction algorithms. Click-through rate prediction is a task of predicting the likelihood that something on a website will be clicked. Models on CTR prediction can not only be employed in targeted advertising systems but also in general item (e.g., movies, news, products) recommender systems, email campaigns, and even search engines. It is also closely related to user satisfaction, conversion rate, and can be helpful in setting campaign goals as it can help advertisers to set realistic expectations.
 
 ```{.python .input}
 from collections import defaultdict
@@ -23,8 +23,8 @@ The following code downloads the dataset from our server and saves it into the l
 
 ```{.python .input  n=15}
 # Saved in the d2l package for later use
-d2l.DATA_HUB['ctr'] = (d2l.DATA_URL+'ctr.zip',
-                      'e18327c48c8e8e5c23da714dd614e390d369843f')
+d2l.DATA_HUB['ctr'] = (d2l.DATA_URL + 'ctr.zip',
+                       'e18327c48c8e8e5c23da714dd614e390d369843f')
 
 data_dir = d2l.download_extract('ctr')
 ```
@@ -81,7 +81,7 @@ class CTRDataset(gluon.data.Dataset):
 The following example loads the training data and print out the first record.
 
 ```{.python .input  n=16}
-train_data = CTRDataset(data_path=data_dir+"train.csv")
+train_data = CTRDataset(data_path=data_dir + "train.csv")
 train_data[0]
 ```
 

diff --git a/chapter_recommender-systems/deepfm.md b/chapter_recommender-systems/deepfm.md
@@ -77,8 +77,8 @@ The data loading process is the same as that of FM. We set the MLP component of
 ```{.python .input  n=4}
 batch_size = 2048
 data_dir = d2l.download_extract('ctr')
-train_data = d2l.CTRDataset(data_dir+"train.csv")
-test_data = d2l.CTRDataset(data_dir+"test.csv",
+train_data = d2l.CTRDataset(data_dir + "train.csv")
+test_data = d2l.CTRDataset(data_dir + "test.csv",
                            feat_mapper=train_data.feat_mapper,
                            defaults=train_data.defaults)
 field_dims = train_data.field_dims

diff --git a/chapter_recommender-systems/fm.md b/chapter_recommender-systems/fm.md
@@ -67,19 +67,17 @@ We use the CTR data wrapper from the last section to load the online advertising
 ```{.python .input  n=3}
 batch_size = 2048
 data_dir = d2l.download_extract('ctr')
-train_data = d2l.CTRDataset(data_dir+"train.csv")
-test_data = d2l.CTRDataset(data_dir+"test.csv",
+train_data = d2l.CTRDataset(data_dir + "train.csv")
+test_data = d2l.CTRDataset(data_dir + "test.csv",
                            feat_mapper=train_data.feat_mapper,
                            defaults=train_data.defaults)
 num_workers = 0 if sys.platform.startswith("win") else 4
-train_iter = gluon.data.DataLoader(train_data, shuffle=True,
-                                   last_batch="rollover",
-                                   batch_size=batch_size,
-                                   num_workers=num_workers)
-test_iter = gluon.data.DataLoader(test_data, shuffle=False,
-                                  last_batch="rollover",
-                                  batch_size=batch_size,
-                                  num_workers=num_workers)
+train_iter = gluon.data.DataLoader(
+    train_data, shuffle=True, last_batch="rollover", batch_size=batch_size,
+    num_workers=num_workers)
+test_iter = gluon.data.DataLoader(
+    test_data, shuffle=False, last_batch="rollover", batch_size=batch_size,
+    num_workers=num_workers)
 ```
 
 ## Train the Model

diff --git a/chapter_recommender-systems/movielens.md b/chapter_recommender-systems/movielens.md
@@ -28,7 +28,8 @@ d2l.DATA_HUB['ml-100k'] = (
 def read_data_ml100k():
     data_dir = d2l.download_extract('ml-100k')
     names = ['user_id', 'item_id', 'rating', 'timestamp']
-    data = pd.read_csv(data_dir+'u.data', '\t', names=names, engine='python')
+    data = pd.read_csv(data_dir + 'u.data', '\t', names=names,
+                       engine='python')
     num_users = data.user_id.unique().shape[0]
     num_items = data.item_id.unique().shape[0]
     return data, num_users, num_items

diff --git a/chapter_recurrent-modern/machine-translation.md b/chapter_recurrent-modern/machine-translation.md
@@ -17,13 +17,13 @@ We first download a dataset that contains a set of English sentences with the co
 
 ```{.python .input  n=8}
 # Saved in the d2l package for later use
-d2l.DATA_HUB['fra-eng'] = (d2l.DATA_URL+'fra-eng.zip',
-                          '94646ad1522d915e7b0f9296181140edcf86a4f5')
+d2l.DATA_HUB['fra-eng'] = (d2l.DATA_URL + 'fra-eng.zip',
+                           '94646ad1522d915e7b0f9296181140edcf86a4f5')
 
 # Saved in the d2l package for later use
 def read_data_nmt():
     data_dir = d2l.download_extract('fra-eng')
-    with open(data_dir+'fra.txt', 'r') as f:
+    with open(data_dir + 'fra.txt', 'r') as f:
         return f.read()
 
 raw_text = read_data_nmt()

diff --git a/chapter_recurrent-neural-networks/text-preprocessing.md b/chapter_recurrent-neural-networks/text-preprocessing.md
@@ -19,8 +19,8 @@ import d2l
 import re
 
 # Saved in the d2l package for later use
-d2l.DATA_HUB['time_machine'] = (d2l.DATA_URL+'timemachine.txt',
-                               '090b5e7e70c295757f55df93cb0a180b9691891a')
+d2l.DATA_HUB['time_machine'] = (d2l.DATA_URL + 'timemachine.txt',
+                                '090b5e7e70c295757f55df93cb0a180b9691891a')
 
 # Saved in the d2l package for later use
 def read_time_machine():