Skip to content

Commit a1e40b1

Browse files
neuronssrowen
authored andcommitted
[MINOR][DOCS] Remove consecutive duplicated words/typo in Spark Repo
## What changes were proposed in this pull request? There are many locations in the Spark repo where the same word occurs consecutively. Sometimes they are appropriately placed, but many times they are not. This PR removes the inappropriately duplicated words. ## How was this patch tested? N/A since only docs or comments were updated. Author: Niranjan Padmanabhan <[email protected]> Closes apache#16455 from neurons/np.structure_streaming_doc.
1 parent 7a82505 commit a1e40b1

File tree

52 files changed

+57
-57
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+57
-57
lines changed

core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ public int compare(RecordPointerAndKeyPrefix r1, RecordPointerAndKeyPrefix r2) {
8686
private final PrefixComparators.RadixSortSupport radixSortSupport;
8787

8888
/**
89-
* Within this buffer, position {@code 2 * i} holds a pointer pointer to the record at
89+
* Within this buffer, position {@code 2 * i} holds a pointer to the record at
9090
* index {@code i}, while position {@code 2 * i + 1} in the array holds an 8-byte key prefix.
9191
*
9292
* Only part of the array will be used to store the pointers, the rest part is preserved as

core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSortDataFormat.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
* Supports sorting an array of (record pointer, key prefix) pairs.
2626
* Used in {@link UnsafeInMemorySorter}.
2727
* <p>
28-
* Within each long[] buffer, position {@code 2 * i} holds a pointer pointer to the record at
28+
* Within each long[] buffer, position {@code 2 * i} holds a pointer to the record at
2929
* index {@code i}, while position {@code 2 * i + 1} in the array holds an 8-byte key prefix.
3030
*/
3131
public final class UnsafeSortDataFormat

core/src/main/scala/org/apache/spark/MapOutputTracker.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ private[spark] class MapOutputTrackerMaster(conf: SparkConf,
317317
pool
318318
}
319319

320-
// Make sure that that we aren't going to exceed the max RPC message size by making sure
320+
// Make sure that we aren't going to exceed the max RPC message size by making sure
321321
// we use broadcast to send large map output statuses.
322322
if (minSizeForBroadcast > maxRpcMessageSize) {
323323
val msg = s"spark.shuffle.mapOutput.minSizeForBroadcast ($minSizeForBroadcast bytes) must " +

core/src/main/scala/org/apache/spark/TaskEndReason.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ case class FetchFailed(
9898
* 4 task failures, instead we immediately go back to the stage which generated the map output,
9999
* and regenerate the missing data. (2) we don't count fetch failures for blacklisting, since
100100
* presumably its not the fault of the executor where the task ran, but the executor which
101-
* stored the data. This is especially important because we we might rack up a bunch of
101+
* stored the data. This is especially important because we might rack up a bunch of
102102
* fetch-failures in rapid succession, on all nodes of the cluster, due to one bad node.
103103
*/
104104
override def countTowardsTaskFailures: Boolean = false

core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ import org.apache.spark.util.{ThreadUtils, Utils}
4343
* Execute using
4444
* ./bin/spark-class org.apache.spark.deploy.FaultToleranceTest
4545
*
46-
* Make sure that that the environment includes the following properties in SPARK_DAEMON_JAVA_OPTS
46+
* Make sure that the environment includes the following properties in SPARK_DAEMON_JAVA_OPTS
4747
* *and* SPARK_JAVA_OPTS:
4848
* - spark.deploy.recoveryMode=ZOOKEEPER
4949
* - spark.deploy.zookeeper.url=172.17.42.1:2181

core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ object HistoryServer extends Logging {
291291

292292
/**
293293
* Create a security manager.
294-
* This turns off security in the SecurityManager, so that the the History Server can start
294+
* This turns off security in the SecurityManager, so that the History Server can start
295295
* in a Spark cluster where security is enabled.
296296
* @param config configuration for the SecurityManager constructor
297297
* @return the security manager for use in constructing the History Server.

core/src/main/scala/org/apache/spark/executor/ShuffleReadMetrics.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ class ShuffleReadMetrics private[spark] () extends Serializable {
9292
private[spark] def setRecordsRead(v: Long): Unit = _recordsRead.setValue(v)
9393

9494
/**
95-
* Resets the value of the current metrics (`this`) and and merges all the independent
95+
* Resets the value of the current metrics (`this`) and merges all the independent
9696
* [[TempShuffleReadMetrics]] into `this`.
9797
*/
9898
private[spark] def setMergeValues(metrics: Seq[TempShuffleReadMetrics]): Unit = {

core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ import org.apache.spark.storage.{BlockId, StorageLevel}
3737
import org.apache.spark.util.Utils
3838

3939
/**
40-
* A BlockTransferService that uses Netty to fetch a set of blocks at at time.
40+
* A BlockTransferService that uses Netty to fetch a set of blocks at time.
4141
*/
4242
private[spark] class NettyBlockTransferService(
4343
conf: SparkConf,

core/src/main/scala/org/apache/spark/util/SizeEstimator.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ object SizeEstimator extends Logging {
350350
// 3. consistent fields layouts throughout the hierarchy: This means we should layout
351351
// superclass first. And we can use superclass's shellSize as a starting point to layout the
352352
// other fields in this class.
353-
// 4. class alignment: HotSpot rounds field blocks up to to HeapOopSize not 4 bytes, confirmed
353+
// 4. class alignment: HotSpot rounds field blocks up to HeapOopSize not 4 bytes, confirmed
354354
// with Aleksey. see https://bugs.openjdk.java.net/browse/CODETOOLS-7901322
355355
//
356356
// The real world field layout is much more complicated. There are three kinds of fields

core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ class ApplicationCacheSuite extends SparkFunSuite with Logging with MockitoSugar
253253
assertNotFound(appId, None)
254254
}
255255

256-
test("Test that if an attempt ID is is set, it must be used in lookups") {
256+
test("Test that if an attempt ID is set, it must be used in lookups") {
257257
val operations = new StubCacheOperations()
258258
val clock = new ManualClock(1)
259259
implicit val cache = new ApplicationCache(operations, retainedApplications = 10, clock = clock)

core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -1819,7 +1819,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou
18191819
assert(mapOutputTracker.getMapSizesByExecutorId(shuffleId, 0).map(_._1).toSet ===
18201820
HashSet(makeBlockManagerId("hostA")))
18211821

1822-
// Reducer should run where RDD 2 has preferences, even though though it also has a shuffle dep
1822+
// Reducer should run where RDD 2 has preferences, even though it also has a shuffle dep
18231823
val reduceTaskSet = taskSets(1)
18241824
assertLocations(reduceTaskSet, Seq(Seq("hostB")))
18251825
complete(reduceTaskSet, Seq((Success, 42)))
@@ -2058,7 +2058,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou
20582058

20592059
// Now complete tasks in the second task set
20602060
val newTaskSet = taskSets(1)
2061-
assert(newTaskSet.tasks.size === 2) // Both tasks 0 and 1 were on on hostA
2061+
assert(newTaskSet.tasks.size === 2) // Both tasks 0 and 1 were on hostA
20622062
runEvent(makeCompletionEvent(newTaskSet.tasks(0), Success, makeMapStatus("hostB", 2)))
20632063
assert(results.size === 0) // Map stage job should not be complete yet
20642064
runEvent(makeCompletionEvent(newTaskSet.tasks(1), Success, makeMapStatus("hostB", 2)))

core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite with LocalSparkContext {
5353
conf
5454
}
5555

56-
test("single insert insert") {
56+
test("single insert") {
5757
val conf = createSparkConf(loadDefaults = false)
5858
sc = new SparkContext("local", "test", conf)
5959
val map = createExternalMap[Int]

docs/ml-features.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,7 @@ for more details on the API.
752752

753753
`Interaction` is a `Transformer` which takes vector or double-valued columns, and generates a single vector column that contains the product of all combinations of one value from each input column.
754754

755-
For example, if you have 2 vector type columns each of which has 3 dimensions as input columns, then then you'll get a 9-dimensional vector as the output column.
755+
For example, if you have 2 vector type columns each of which has 3 dimensions as input columns, then you'll get a 9-dimensional vector as the output column.
756756

757757
**Examples**
758758

docs/mllib-statistics.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ v = u.map(lambda x: 1.0 + 2.0 * x)
354354
useful for visualizing empirical probability distributions without requiring assumptions about the
355355
particular distribution that the observed samples are drawn from. It computes an estimate of the
356356
probability density function of a random variables, evaluated at a given set of points. It achieves
357-
this estimate by expressing the PDF of the empirical distribution at a particular point as the the
357+
this estimate by expressing the PDF of the empirical distribution at a particular point as the
358358
mean of PDFs of normal distributions centered around each of the samples.
359359

360360
<div class="codetabs">

docs/structured-streaming-kafka-integration.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ Note that the following Kafka params cannot be set and the Kafka source will thr
244244
- **group.id**: Kafka source will create a unique group id for each query automatically.
245245
- **auto.offset.reset**: Set the source option `startingOffsets` to specify
246246
where to start instead. Structured Streaming manages which offsets are consumed internally, rather
247-
than rely on the kafka Consumer to do it. This will ensure that no data is missed when when new
247+
than rely on the kafka Consumer to do it. This will ensure that no data is missed when new
248248
topics/partitions are dynamically subscribed. Note that `startingOffsets` only applies when a new
249249
Streaming query is started, and that resuming will always pick up from where the query left off.
250250
- **key.deserializer**: Keys are always deserialized as byte arrays with ByteArrayDeserializer. Use

docs/structured-streaming-programming-guide.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -680,7 +680,7 @@ windowedCounts = words.groupBy(
680680

681681
### Handling Late Data and Watermarking
682682
Now consider what happens if one of the events arrives late to the application.
683-
For example, say, a word generated at 12:04 (i.e. event time) could be received received by
683+
For example, say, a word generated at 12:04 (i.e. event time) could be received by
684684
the application at 12:11. The application should use the time 12:04 instead of 12:11
685685
to update the older counts for the window `12:00 - 12:10`. This occurs
686686
naturally in our window-based grouping – Structured Streaming can maintain the intermediate state

examples/src/main/java/org/apache/spark/examples/ml/JavaLDAExample.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ public static void main(String[] args) {
5252
double ll = model.logLikelihood(dataset);
5353
double lp = model.logPerplexity(dataset);
5454
System.out.println("The lower bound on the log likelihood of the entire corpus: " + ll);
55-
System.out.println("The upper bound bound on perplexity: " + lp);
55+
System.out.println("The upper bound on perplexity: " + lp);
5656

5757
// Describe topics.
5858
Dataset<Row> topics = model.describeTopics(3);

examples/src/main/python/ml/lda_example.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
ll = model.logLikelihood(dataset)
4747
lp = model.logPerplexity(dataset)
4848
print("The lower bound on the log likelihood of the entire corpus: " + str(ll))
49-
print("The upper bound bound on perplexity: " + str(lp))
49+
print("The upper bound on perplexity: " + str(lp))
5050

5151
# Describe topics.
5252
topics = model.describeTopics(3)

examples/src/main/scala/org/apache/spark/examples/ml/LDAExample.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ object LDAExample {
5050
val ll = model.logLikelihood(dataset)
5151
val lp = model.logPerplexity(dataset)
5252
println(s"The lower bound on the log likelihood of the entire corpus: $ll")
53-
println(s"The upper bound bound on perplexity: $lp")
53+
println(s"The upper bound on perplexity: $lp")
5454

5555
// Describe topics.
5656
val topics = model.describeTopics(3)

external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkSink.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ import org.apache.flume.sink.AbstractSink
4545
* the thread itself is blocked and a reference to it saved off.
4646
*
4747
* When the ack for that batch is received,
48-
* the thread which created the transaction is is retrieved and it commits the transaction with the
48+
* the thread which created the transaction is retrieved and it commits the transaction with the
4949
* channel from the same thread it was originally created in (since Flume transactions are
5050
* thread local). If a nack is received instead, the sink rolls back the transaction. If no ack
5151
* is received within the specified timeout, the transaction is rolled back too. If an ack comes

external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ private[kafka010] class KafkaSourceProvider extends StreamSourceProvider
212212
|Instead set the source option '$STARTING_OFFSETS_OPTION_KEY' to 'earliest' or 'latest'
213213
|to specify where to start. Structured Streaming manages which offsets are consumed
214214
|internally, rather than relying on the kafkaConsumer to do it. This will ensure that no
215-
|data is missed when when new topics/partitions are dynamically subscribed. Note that
215+
|data is missed when new topics/partitions are dynamically subscribed. Note that
216216
|'$STARTING_OFFSETS_OPTION_KEY' only applies when a new Streaming query is started, and
217217
|that resuming will always pick up from where the query left off. See the docs for more
218218
|details.

external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ abstract class KinesisBackedBlockRDDTests(aggregateTestData: Boolean)
129129

130130
/**
131131
* Test the WriteAheadLogBackedRDD, by writing some partitions of the data to block manager
132-
* and the rest to a write ahead log, and then reading reading it all back using the RDD.
132+
* and the rest to a write ahead log, and then reading it all back using the RDD.
133133
* It can also test if the partitions that were read from the log were again stored in
134134
* block manager.
135135
*

mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ abstract class LDAModel private[ml] (
512512
}
513513

514514
/**
515-
* Calculate an upper bound bound on perplexity. (Lower is better.)
515+
* Calculate an upper bound on perplexity. (Lower is better.)
516516
* See Equation (16) in the Online LDA paper (Hoffman et al., 2010).
517517
*
518518
* WARNING: If this model is an instance of [[DistributedLDAModel]] (produced when [[optimizer]]

mllib/src/main/scala/org/apache/spark/mllib/api/python/Word2VecModelWrapper.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ private[python] class Word2VecModelWrapper(model: Word2VecModel) {
5454
}
5555

5656
/**
57-
* Finds words similar to the the vector representation of a word without
57+
* Finds words similar to the vector representation of a word without
5858
* filtering results.
5959
* @param vector a vector
6060
* @param num number of synonyms to find

mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ class LocalLDAModel private[spark] (
245245
}
246246

247247
/**
248-
* Calculate an upper bound bound on perplexity. (Lower is better.)
248+
* Calculate an upper bound on perplexity. (Lower is better.)
249249
* See Equation (16) in original Online LDA paper.
250250
*
251251
* @param documents test corpus to use for calculating perplexity

mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ import org.apache.spark.streaming.dstream.DStream
2929
/**
3030
* :: DeveloperApi ::
3131
* StreamingLinearAlgorithm implements methods for continuously
32-
* training a generalized linear model model on streaming data,
32+
* training a generalized linear model on streaming data,
3333
* and using it for prediction on (possibly different) streaming data.
3434
*
3535
* This class takes as type parameters a GeneralizedLinearModel,

python/pyspark/ml/clustering.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -699,7 +699,7 @@ def logLikelihood(self, dataset):
699699
@since("2.0.0")
700700
def logPerplexity(self, dataset):
701701
"""
702-
Calculate an upper bound bound on perplexity. (Lower is better.)
702+
Calculate an upper bound on perplexity. (Lower is better.)
703703
See Equation (16) in the Online LDA paper (Hoffman et al., 2010).
704704
705705
WARNING: If this model is an instance of :py:class:`DistributedLDAModel` (produced when

python/pyspark/ml/linalg/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ def __init__(self, size, *args):
481481
>>> SparseVector(4, {1:1.0, 6:2.0})
482482
Traceback (most recent call last):
483483
...
484-
AssertionError: Index 6 is out of the the size of vector with size=4
484+
AssertionError: Index 6 is out of the size of vector with size=4
485485
>>> SparseVector(4, {-1:1.0})
486486
Traceback (most recent call last):
487487
...
@@ -521,7 +521,7 @@ def __init__(self, size, *args):
521521

522522
if self.indices.size > 0:
523523
assert np.max(self.indices) < self.size, \
524-
"Index %d is out of the the size of vector with size=%d" \
524+
"Index %d is out of the size of vector with size=%d" \
525525
% (np.max(self.indices), self.size)
526526
assert np.min(self.indices) >= 0, \
527527
"Contains negative index %d" % (np.min(self.indices))

python/pyspark/sql/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def install_exception_handler():
9595
original = py4j.protocol.get_return_value
9696
# The original `get_return_value` is not patched, it's idempotent.
9797
patched = capture_sql_exception(original)
98-
# only patch the one used in in py4j.java_gateway (call Java API)
98+
# only patch the one used in py4j.java_gateway (call Java API)
9999
py4j.java_gateway.get_return_value = patched
100100

101101

resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ private[yarn] case class ContainerLocalityPreferences(nodes: Array[String], rack
3232

3333
/**
3434
* This strategy is calculating the optimal locality preferences of YARN containers by considering
35-
* the node ratio of pending tasks, number of required cores/containers and and locality of current
35+
* the node ratio of pending tasks, number of required cores/containers and locality of current
3636
* existing and pending allocated containers. The target of this algorithm is to maximize the number
3737
* of tasks that would run locally.
3838
*

sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ public void setNullAt(int i) {
196196
assertIndexIsValid(i);
197197
BitSetMethods.set(baseObject, baseOffset, i);
198198
// To preserve row equality, zero out the value when setting the column to null.
199-
// Since this row does does not currently support updates to variable-length values, we don't
199+
// Since this row does not currently support updates to variable-length values, we don't
200200
// have to worry about zeroing out that data.
201201
Platform.putLong(baseObject, getFieldOffset(i), 0);
202202
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,7 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
516516
* into the number of buckets); both variables are based on the size of the current partition.
517517
* During the calculation process the function keeps track of the current row number, the current
518518
* bucket number, and the row number at which the bucket will change (bucketThreshold). When the
519-
* current row number reaches bucket threshold, the bucket value is increased by one and the the
519+
* current row number reaches bucket threshold, the bucket value is increased by one and the
520520
* threshold is increased by the bucket size (plus one extra if the current bucket is padded).
521521
*
522522
* This documentation has been based upon similar documentation for the Hive and Presto projects.

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -795,7 +795,7 @@ case object OneRowRelation extends LeafNode {
795795

796796
/**
797797
* Computes [[Statistics]] for this plan. The default implementation assumes the output
798-
* cardinality is the product of of all child plan's cardinality, i.e. applies in the case
798+
* cardinality is the product of all child plan's cardinality, i.e. applies in the case
799799
* of cartesian joins.
800800
*
801801
* [[LeafNode]]s must override this.

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ object DateTimeUtils {
142142
}
143143

144144
/**
145-
* Returns the number of days since epoch from from java.sql.Date.
145+
* Returns the number of days since epoch from java.sql.Date.
146146
*/
147147
def fromJavaDate(date: Date): SQLDate = {
148148
millisToDays(date.getTime)
@@ -503,7 +503,7 @@ object DateTimeUtils {
503503
}
504504

505505
/**
506-
* Calculates the year and and the number of the day in the year for the given
506+
* Calculates the year and the number of the day in the year for the given
507507
* number of days. The given days is the number of days since 1.1.1970.
508508
*
509509
* The calculation uses the fact that the period 1.1.2001 until 31.12.2400 is

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeSetSuite.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class AttributeSetSuite extends SparkFunSuite {
5252
assert((aSet ++ bSet).contains(aLower) === true)
5353
}
5454

55-
test("extracts all references references") {
55+
test("extracts all references ") {
5656
val addSet = AttributeSet(Add(aUpper, Alias(bUpper, "test")()):: Nil)
5757
assert(addSet.contains(aUpper))
5858
assert(addSet.contains(aLower))

sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ class Dataset[T] private[sql](
361361
* method used to map columns depend on the type of `U`:
362362
* - When `U` is a class, fields for the class will be mapped to columns of the same name
363363
* (case sensitivity is determined by `spark.sql.caseSensitive`).
364-
* - When `U` is a tuple, the columns will be be mapped by ordinal (i.e. the first column will
364+
* - When `U` is a tuple, the columns will be mapped by ordinal (i.e. the first column will
365365
* be assigned to `_1`).
366366
* - When `U` is a primitive type (i.e. String, Int, etc), then the first column of the
367367
* `DataFrame` will be used.

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ object PartitionPath {
4141
}
4242

4343
/**
44-
* Holds a directory in a partitioned collection of files as well as as the partition values
44+
* Holds a directory in a partitioned collection of files as well as the partition values
4545
* in the form of a Row. Before scanning, the files at `path` need to be enumerated.
4646
*/
4747
case class PartitionPath(values: InternalRow, path: Path)

0 commit comments

Comments
 (0)