Skip to content

Commit 024482b

Browse files
dongjoon-hyunsrowen
authored andcommittedFeb 22, 2016
[MINOR][DOCS] Fix all typos in markdown files of doc and similar patterns in other comments
## What changes were proposed in this pull request? This PR tries to fix all typos in all markdown files under `docs` module, and fixes similar typos in other comments, too. ## How was the this patch tested? manual tests. Author: Dongjoon Hyun <[email protected]> Closes apache#11300 from dongjoon-hyun/minor_fix_typos.
1 parent 1b14445 commit 024482b

File tree

36 files changed

+55
-55
lines changed

36 files changed

+55
-55
lines changed
 

‎R/pkg/R/functions.R

+3-3
Original file line numberDiff line numberDiff line change
@@ -1962,7 +1962,7 @@ setMethod("sha2", signature(y = "Column", x = "numeric"),
19621962

19631963
#' shiftLeft
19641964
#'
1965-
#' Shift the the given value numBits left. If the given value is a long value, this function
1965+
#' Shift the given value numBits left. If the given value is a long value, this function
19661966
#' will return a long value else it will return an integer value.
19671967
#'
19681968
#' @family math_funcs
@@ -1980,7 +1980,7 @@ setMethod("shiftLeft", signature(y = "Column", x = "numeric"),
19801980

19811981
#' shiftRight
19821982
#'
1983-
#' Shift the the given value numBits right. If the given value is a long value, it will return
1983+
#' Shift the given value numBits right. If the given value is a long value, it will return
19841984
#' a long value else it will return an integer value.
19851985
#'
19861986
#' @family math_funcs
@@ -1998,7 +1998,7 @@ setMethod("shiftRight", signature(y = "Column", x = "numeric"),
19981998

19991999
#' shiftRightUnsigned
20002000
#'
2001-
#' Unsigned shift the the given value numBits right. If the given value is a long value,
2001+
#' Unsigned shift the given value numBits right. If the given value is a long value,
20022002
#' it will return a long value else it will return an integer value.
20032003
#'
20042004
#' @family math_funcs

‎R/pkg/R/sparkR.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ sparkRHive.init <- function(jsc = NULL) {
299299
#'
300300
#' @param sc existing spark context
301301
#' @param groupid the ID to be assigned to job groups
302-
#' @param description description for the the job group ID
302+
#' @param description description for the job group ID
303303
#' @param interruptOnCancel flag to indicate if the job is interrupted on job cancellation
304304
#' @examples
305305
#'\dontrun{

‎common/sketch/src/main/java/org/apache/spark/util/sketch/CountMinSketch.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
* Suppose you want to estimate the number of times an element {@code x} has appeared in a data
4040
* stream so far. With probability {@code delta}, the estimate of this frequency is within the
4141
* range {@code true frequency <= estimate <= true frequency + eps * N}, where {@code N} is the
42-
* total count of items have appeared the the data stream so far.
42+
* total count of items have appeared the data stream so far.
4343
*
4444
* Under the cover, a {@link CountMinSketch} is essentially a two-dimensional {@code long} array
4545
* with depth {@code d} and width {@code w}, where

‎core/src/main/scala/org/apache/spark/CacheManager.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging {
120120
* The effective storage level refers to the level that actually specifies BlockManager put
121121
* behavior, not the level originally specified by the user. This is mainly for forcing a
122122
* MEMORY_AND_DISK partition to disk if there is not enough room to unroll the partition,
123-
* while preserving the the original semantics of the RDD as specified by the application.
123+
* while preserving the original semantics of the RDD as specified by the application.
124124
*/
125125
private def putInBlockManager[T](
126126
key: BlockId,

‎core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ class OrderedRDDFunctions[K : Ordering : ClassTag,
7676
}
7777

7878
/**
79-
* Returns an RDD containing only the elements in the the inclusive range `lower` to `upper`.
79+
* Returns an RDD containing only the elements in the inclusive range `lower` to `upper`.
8080
* If the RDD has been partitioned using a `RangePartitioner`, then this operation can be
8181
* performed efficiently by only scanning the partitions that might contain matching elements.
8282
* Otherwise, a standard `filter` is applied to all partitions.

‎core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -655,7 +655,7 @@ class DAGScheduler(
655655

656656
/**
657657
* Submit a shuffle map stage to run independently and get a JobWaiter object back. The waiter
658-
* can be used to block until the the job finishes executing or can be used to cancel the job.
658+
* can be used to block until the job finishes executing or can be used to cancel the job.
659659
* This method is used for adaptive query planning, to run map stages and look at statistics
660660
* about their outputs before submitting downstream stages.
661661
*

‎core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala

+3-3
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ import org.apache.spark.util.{ResetSystemProperties, Utils}
4747
/**
4848
* A collection of tests against the historyserver, including comparing responses from the json
4949
* metrics api to a set of known "golden files". If new endpoints / parameters are added,
50-
* cases should be added to this test suite. The expected outcomes can be genered by running
50+
* cases should be added to this test suite. The expected outcomes can be generated by running
5151
* the HistoryServerSuite.main. Note that this will blindly generate new expectation files matching
5252
* the current behavior -- the developer must verify that behavior is correct.
5353
*
@@ -274,12 +274,12 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
274274
implicit val webDriver: WebDriver = new HtmlUnitDriver
275275
implicit val formats = org.json4s.DefaultFormats
276276

277-
// this test dir is explictly deleted on successful runs; retained for diagnostics when
277+
// this test dir is explicitly deleted on successful runs; retained for diagnostics when
278278
// not
279279
val logDir = Utils.createDirectory(System.getProperty("java.io.tmpdir", "logs"))
280280

281281
// a new conf is used with the background thread set and running at its fastest
282-
// alllowed refresh rate (1Hz)
282+
// allowed refresh rate (1Hz)
283283
val myConf = new SparkConf()
284284
.set("spark.history.fs.logDirectory", logDir.getAbsolutePath)
285285
.set("spark.eventLog.dir", logDir.getAbsolutePath)

‎docs/ml-classification-regression.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ Nodes in the output layer use softmax function:
252252
\]`
253253
The number of nodes `$N$` in the output layer corresponds to the number of classes.
254254

255-
MLPC employes backpropagation for learning the model. We use logistic loss function for optimization and L-BFGS as optimization routine.
255+
MLPC employs backpropagation for learning the model. We use logistic loss function for optimization and L-BFGS as optimization routine.
256256

257257
**Example**
258258

‎docs/ml-features.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ for more details on the API.
185185
<div data-lang="python" markdown="1">
186186

187187
Refer to the [Tokenizer Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.Tokenizer) and
188-
the the [RegexTokenizer Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.RegexTokenizer)
188+
the [RegexTokenizer Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.RegexTokenizer)
189189
for more details on the API.
190190

191191
{% include_example python/ml/tokenizer_example.py %}
@@ -459,7 +459,7 @@ column, we should get the following:
459459
"a" gets index `0` because it is the most frequent, followed by "c" with index `1` and "b" with
460460
index `2`.
461461

462-
Additionaly, there are two strategies regarding how `StringIndexer` will handle
462+
Additionally, there are two strategies regarding how `StringIndexer` will handle
463463
unseen labels when you have fit a `StringIndexer` on one dataset and then use it
464464
to transform another:
465465

@@ -779,7 +779,7 @@ for more details on the API.
779779

780780
* `splits`: Parameter for mapping continuous features into buckets. With n+1 splits, there are n buckets. A bucket defined by splits x,y holds values in the range [x,y) except the last bucket, which also includes y. Splits should be strictly increasing. Values at -inf, inf must be explicitly provided to cover all Double values; Otherwise, values outside the splits specified will be treated as errors. Two examples of `splits` are `Array(Double.NegativeInfinity, 0.0, 1.0, Double.PositiveInfinity)` and `Array(0.0, 1.0, 2.0)`.
781781

782-
Note that if you have no idea of the upper bound and lower bound of the targeted column, you would better add the `Double.NegativeInfinity` and `Double.PositiveInfinity` as the bounds of your splits to prevent a potenial out of Bucketizer bounds exception.
782+
Note that if you have no idea of the upper bound and lower bound of the targeted column, you would better add the `Double.NegativeInfinity` and `Double.PositiveInfinity` as the bounds of your splits to prevent a potential out of Bucketizer bounds exception.
783783

784784
Note also that the splits that you provided have to be in strictly increasing order, i.e. `s0 < s1 < s2 < ... < sn`.
785785

‎docs/ml-guide.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,7 @@ Currently, `spark.ml` supports model selection using the [`CrossValidator`](api/
628628
The `Evaluator` can be a [`RegressionEvaluator`](api/scala/index.html#org.apache.spark.ml.evaluation.RegressionEvaluator)
629629
for regression problems, a [`BinaryClassificationEvaluator`](api/scala/index.html#org.apache.spark.ml.evaluation.BinaryClassificationEvaluator)
630630
for binary data, or a [`MultiClassClassificationEvaluator`](api/scala/index.html#org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator)
631-
for multiclass problems. The default metric used to choose the best `ParamMap` can be overriden by the `setMetricName`
631+
for multiclass problems. The default metric used to choose the best `ParamMap` can be overridden by the `setMetricName`
632632
method in each of these evaluators.
633633

634634
The `ParamMap` which produces the best evaluation metric (averaged over the `$k$` folds) is selected as the best model.

‎docs/mllib-clustering.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ for i in range(2):
300300
## Power iteration clustering (PIC)
301301

302302
Power iteration clustering (PIC) is a scalable and efficient algorithm for clustering vertices of a
303-
graph given pairwise similarties as edge properties,
303+
graph given pairwise similarities as edge properties,
304304
described in [Lin and Cohen, Power Iteration Clustering](http://www.icml2010.org/papers/387.pdf).
305305
It computes a pseudo-eigenvector of the normalized affinity matrix of the graph via
306306
[power iteration](http://en.wikipedia.org/wiki/Power_iteration) and uses it to cluster vertices.
@@ -786,7 +786,7 @@ This example shows how to estimate clusters on streaming data.
786786
<div data-lang="scala" markdown="1">
787787
Refer to the [`StreamingKMeans` Scala docs](api/scala/index.html#org.apache.spark.mllib.clustering.StreamingKMeans) for details on the API.
788788

789-
First we import the neccessary classes.
789+
First we import the necessary classes.
790790

791791
{% highlight scala %}
792792

@@ -837,7 +837,7 @@ ssc.awaitTermination()
837837
<div data-lang="python" markdown="1">
838838
Refer to the [`StreamingKMeans` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.clustering.StreamingKMeans) for more details on the API.
839839

840-
First we import the neccessary classes.
840+
First we import the necessary classes.
841841

842842
{% highlight python %}
843843
from pyspark.mllib.linalg import Vectors

‎docs/mllib-evaluation-metrics.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ plots (recall, false positive rate) points.
6767
</thead>
6868
<tbody>
6969
<tr>
70-
<td>Precision (Postive Predictive Value)</td>
70+
<td>Precision (Positive Predictive Value)</td>
7171
<td>$PPV=\frac{TP}{TP + FP}$</td>
7272
</tr>
7373
<tr>
@@ -360,7 +360,7 @@ $$I_A(x) = \begin{cases}1 & \text{if $x \in A$}, \\ 0 & \text{otherwise}.\end{ca
360360

361361
**Examples**
362362

363-
The following code snippets illustrate how to evaluate the performance of a multilabel classifer. The examples
363+
The following code snippets illustrate how to evaluate the performance of a multilabel classifier. The examples
364364
use the fake prediction and label data for multilabel classification that is shown below.
365365

366366
Document predictions:
@@ -558,7 +558,7 @@ variable from a number of independent variables.
558558
<td>$RMSE = \sqrt{\frac{\sum_{i=0}^{N-1} (\mathbf{y}_i - \hat{\mathbf{y}}_i)^2}{N}}$</td>
559559
</tr>
560560
<tr>
561-
<td>Mean Absoloute Error (MAE)</td>
561+
<td>Mean Absolute Error (MAE)</td>
562562
<td>$MAE=\sum_{i=0}^{N-1} \left|\mathbf{y}_i - \hat{\mathbf{y}}_i\right|$</td>
563563
</tr>
564564
<tr>

‎docs/mllib-frequent-pattern-mining.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ pattern mining problem.
135135
included in the results.
136136
* `maxLocalProjDBSize`: the maximum number of items allowed in a
137137
prefix-projected database before local iterative processing of the
138-
projected databse begins. This parameter should be tuned with respect
138+
projected database begins. This parameter should be tuned with respect
139139
to the size of your executors.
140140

141141
**Examples**

‎docs/monitoring.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ The history server can be configured as follows:
108108
<td>spark.history.fs.update.interval</td>
109109
<td>10s</td>
110110
<td>
111-
The period at which the the filesystem history provider checks for new or
111+
The period at which the filesystem history provider checks for new or
112112
updated logs in the log directory. A shorter interval detects new applications faster,
113113
at the expense of more server load re-reading updated applications.
114114
As soon as an update has completed, listings of the completed and incomplete applications

‎docs/programming-guide.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,7 @@ class MyClass {
629629
}
630630
{% endhighlight %}
631631

632-
is equilvalent to writing `rdd.map(x => this.field + x)`, which references all of `this`. To avoid this
632+
is equivalent to writing `rdd.map(x => this.field + x)`, which references all of `this`. To avoid this
633633
issue, the simplest way is to copy `field` into a local variable instead of accessing it externally:
634634

635635
{% highlight scala %}

‎docs/running-on-mesos.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ overhead, but at the cost of reserving the Mesos resources for the complete dura
188188
application.
189189

190190
Coarse-grained is the default mode. You can also set `spark.mesos.coarse` property to true
191-
to turn it on explictly in [SparkConf](configuration.html#spark-properties):
191+
to turn it on explicitly in [SparkConf](configuration.html#spark-properties):
192192

193193
{% highlight scala %}
194194
conf.set("spark.mesos.coarse", "true")
@@ -384,7 +384,7 @@ See the [configuration page](configuration.html) for information on Spark config
384384
<li>Scalar constraints are matched with "less than equal" semantics i.e. value in the constraint must be less than or equal to the value in the resource offer.</li>
385385
<li>Range constraints are matched with "contains" semantics i.e. value in the constraint must be within the resource offer's value.</li>
386386
<li>Set constraints are matched with "subset of" semantics i.e. value in the constraint must be a subset of the resource offer's value.</li>
387-
<li>Text constraints are metched with "equality" semantics i.e. value in the constraint must be exactly equal to the resource offer's value.</li>
387+
<li>Text constraints are matched with "equality" semantics i.e. value in the constraint must be exactly equal to the resource offer's value.</li>
388388
<li>In case there is no value present as a part of the constraint any offer with the corresponding attribute will be accepted (without value check).</li>
389389
</ul>
390390
</td>

‎docs/spark-standalone.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ By default, standalone scheduling clusters are resilient to Worker failures (ins
335335

336336
**Overview**
337337

338-
Utilizing ZooKeeper to provide leader election and some state storage, you can launch multiple Masters in your cluster connected to the same ZooKeeper instance. One will be elected "leader" and the others will remain in standby mode. If the current leader dies, another Master will be elected, recover the old Master's state, and then resume scheduling. The entire recovery process (from the time the the first leader goes down) should take between 1 and 2 minutes. Note that this delay only affects scheduling _new_ applications -- applications that were already running during Master failover are unaffected.
338+
Utilizing ZooKeeper to provide leader election and some state storage, you can launch multiple Masters in your cluster connected to the same ZooKeeper instance. One will be elected "leader" and the others will remain in standby mode. If the current leader dies, another Master will be elected, recover the old Master's state, and then resume scheduling. The entire recovery process (from the time the first leader goes down) should take between 1 and 2 minutes. Note that this delay only affects scheduling _new_ applications -- applications that were already running during Master failover are unaffected.
339339

340340
Learn more about getting started with ZooKeeper [here](http://zookeeper.apache.org/doc/trunk/zookeeperStarted.html).
341341

‎docs/sql-programming-guide.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1372,7 +1372,7 @@ Hive metastore Parquet table to a Spark SQL Parquet table. The reconciliation ru
13721372
1. The reconciled schema contains exactly those fields defined in Hive metastore schema.
13731373

13741374
- Any fields that only appear in the Parquet schema are dropped in the reconciled schema.
1375-
- Any fileds that only appear in the Hive metastore schema are added as nullable field in the
1375+
- Any fields that only appear in the Hive metastore schema are added as nullable field in the
13761376
reconciled schema.
13771377

13781378
#### Metadata Refreshing

‎docs/streaming-flume-integration.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ See the [Flume's documentation](https://flume.apache.org/documentation.html) for
3030
configuring Flume agents.
3131

3232
#### Configuring Spark Streaming Application
33-
1. **Linking:** In your SBT/Maven projrect definition, link your streaming application against the following artifact (see [Linking section](streaming-programming-guide.html#linking) in the main programming guide for further information).
33+
1. **Linking:** In your SBT/Maven project definition, link your streaming application against the following artifact (see [Linking section](streaming-programming-guide.html#linking) in the main programming guide for further information).
3434

3535
groupId = org.apache.spark
3636
artifactId = spark-streaming-flume_{{site.SCALA_BINARY_VERSION}}

‎docs/streaming-kinesis-integration.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or m
9595
</div>
9696
</div>
9797

98-
- `streamingContext`: StreamingContext containg an application name used by Kinesis to tie this Kinesis application to the Kinesis stream
98+
- `streamingContext`: StreamingContext containing an application name used by Kinesis to tie this Kinesis application to the Kinesis stream
9999

100100
- `[Kinesis app name]`: The application name that will be used to checkpoint the Kinesis
101101
sequence numbers in DynamoDB table.
@@ -216,6 +216,6 @@ de-aggregate records during consumption.
216216

217217
- Checkpointing too frequently will cause excess load on the AWS checkpoint storage layer and may lead to AWS throttling. The provided example handles this throttling with a random-backoff-retry strategy.
218218

219-
- If no Kinesis checkpoint info exists when the input DStream starts, it will start either from the oldest record available (InitialPositionInStream.TRIM_HORIZON) or from the latest tip (InitialPostitionInStream.LATEST). This is configurable.
219+
- If no Kinesis checkpoint info exists when the input DStream starts, it will start either from the oldest record available (InitialPositionInStream.TRIM_HORIZON) or from the latest tip (InitialPositionInStream.LATEST). This is configurable.
220220
- InitialPositionInStream.LATEST could lead to missed records if data is added to the stream while no input DStreams are running (and no checkpoint info is being stored).
221221
- InitialPositionInStream.TRIM_HORIZON may lead to duplicate processing of records where the impact is dependent on checkpoint frequency and processing idempotency.

‎docs/streaming-programming-guide.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ JavaReceiverInputDStream<String> lines = jssc.socketTextStream("localhost", 9999
158158
{% endhighlight %}
159159

160160
This `lines` DStream represents the stream of data that will be received from the data
161-
server. Each record in this stream is a line of text. Then, we want to split the the lines by
161+
server. Each record in this stream is a line of text. Then, we want to split the lines by
162162
space into words.
163163

164164
{% highlight java %}

‎graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
266266
}
267267
}
268268

269-
/** Test whether the closure accesses the the attribute with name `attrName`. */
269+
/** Test whether the closure accesses the attribute with name `attrName`. */
270270
private def accessesVertexAttr(closure: AnyRef, attrName: String): Boolean = {
271271
try {
272272
BytecodeUtils.invokedMethod(closure, classOf[EdgeTriplet[VD, ED]], attrName)

‎graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ object GraphGenerators extends Logging {
166166
}
167167

168168
/**
169-
* This method recursively subdivides the the adjacency matrix into quadrants
169+
* This method recursively subdivides the adjacency matrix into quadrants
170170
* until it picks a single cell. The naming conventions in this paper match
171171
* those of the R-MAT paper. There are a power of 2 number of nodes in the graph.
172172
* The adjacency matrix looks like:

‎mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -1301,7 +1301,7 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
13011301

13021302
/**
13031303
* Partitioner used by ALS. We requires that getPartition is a projection. That is, for any key k,
1304-
* we have getPartition(getPartition(k)) = getPartition(k). Since the the default HashPartitioner
1304+
* we have getPartition(getPartition(k)) = getPartition(k). Since the default HashPartitioner
13051305
* satisfies this requirement, we simply use a type alias here.
13061306
*/
13071307
private[recommendation] type ALSPartitioner = org.apache.spark.HashPartitioner

0 commit comments

Comments
 (0)