forked from wangzheng0822/algo
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ad0d122
commit 62328ba
Showing
2 changed files
with
98 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
package ch29_heap_solutions | ||
|
||
import java.io.{BufferedWriter, File, FileWriter} | ||
|
||
import scala.collection.mutable | ||
import scala.io.Source | ||
import scala.util.control.Breaks._ | ||
|
||
object FileMerger { | ||
|
||
/** | ||
* each given file has sorted String as content, we need to merge them together | ||
* | ||
* @param smallFiles - small files with sorted content | ||
* @return merged file | ||
*/ | ||
def mergeFiles(smallFiles: List[File]): File = { | ||
//init output file | ||
val output = File.createTempFile("merged-file", ".txt") | ||
val writer = new BufferedWriter(new FileWriter(output)) | ||
//init small top heap | ||
val priorityQueue = new mutable.PriorityQueue[Tuple2[Char, Source]]()(Ordering.by((_: (Char, Source))._1).reverse) | ||
val sources = smallFiles.toArray.map(smallFile => Source.fromFile(smallFile)) | ||
//init fill the priority queue from each file | ||
sources.foreach(source => priorityQueue.enqueue(Tuple2(source.next(), source))) | ||
|
||
breakable { | ||
while (true) { | ||
val next = priorityQueue.dequeue() | ||
val output: Char = next._1 | ||
val source = next._2 | ||
writer.append(output) | ||
if (source.hasNext) { | ||
priorityQueue.enqueue(Tuple2(source.next(), source)) | ||
} | ||
//determine the end of merge | ||
if (sources.forall(!_.hasNext)) { | ||
break | ||
} | ||
} | ||
} | ||
|
||
writer.close() | ||
output | ||
} | ||
} |
52 changes: 52 additions & 0 deletions
52
scala/src/test/scala/ch29_heap_solutions/FileMergerTest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
package ch29_heap_solutions | ||
|
||
import java.io.{BufferedWriter, File, FileWriter} | ||
|
||
import org.scalatest.{FlatSpec, Matchers} | ||
|
||
import scala.io.Source | ||
import scala.util.Random | ||
|
||
class FileMergerTest extends FlatSpec with Matchers { | ||
|
||
behavior of "FileMergerTest" | ||
|
||
it should "mergeFiles" in { | ||
val num = 10 | ||
val contentCount = 10 | ||
val random = Random.alphanumeric | ||
val files = new Array[File](num) | ||
for (i <- Range(0, num)) { | ||
val file = File.createTempFile(i + "-small", ".txt") | ||
files(i) = file | ||
val writer = new BufferedWriter(new FileWriter(file)) | ||
val content = random.take((i + 1) * contentCount).toArray.slice(i * contentCount, (i + 1) * contentCount) | ||
|
||
writer.write(content.sorted) | ||
writer.flush() | ||
writer.close() | ||
} | ||
println("small files below") | ||
files.foreach(printFile) | ||
|
||
val mergedFile = FileMerger.mergeFiles(files.toList) | ||
|
||
val raw = Source.fromFile(mergedFile).toArray | ||
raw should equal(raw.sorted) | ||
|
||
println("") | ||
println("merged file below") | ||
printFile(mergedFile) | ||
|
||
//clean up | ||
files.foreach(_.delete()) | ||
mergedFile.delete() | ||
|
||
} | ||
|
||
def printFile(file: File): Unit = { | ||
val source = Source.fromFile(file) | ||
source.getLines().foreach(println) | ||
} | ||
|
||
} |