-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implemented support for PDF documents
- Loading branch information
Showing
7 changed files
with
205 additions
and
48 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import Foundation | ||
|
||
struct OCRResult { | ||
var text: String | ||
var suggestedFilename: String | ||
} | ||
|
||
protocol OCROperation { | ||
init(fileURL: URL, customLanguages: [String]) | ||
func run() throws -> AsyncThrowingStream<OCRResult, Error> | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import Quartz | ||
|
||
extension CGPDFDocument { | ||
func cgImage(at pageNumber: Int) throws -> CGImage { | ||
guard let page = page(at: pageNumber) else { | ||
throw Failure("Page #\(pageNumber) not found.") | ||
} | ||
|
||
let pageRect = page.getBoxRect(.mediaBox) | ||
|
||
let img = NSImage(size: pageRect.size, flipped: true) { rect in | ||
guard let ctx = NSGraphicsContext.current?.cgContext else { return false } | ||
|
||
NSColor.white.setFill() | ||
rect.fill() | ||
|
||
ctx.translateBy(x: 0, y: pageRect.size.height) | ||
ctx.scaleBy(x: 1.0, y: -1.0) | ||
|
||
ctx.drawPDFPage(page) | ||
|
||
return true | ||
} | ||
|
||
guard let cgImage = img.cgImage(forProposedRect: nil, context: nil, hints: nil) else { | ||
throw Failure("Failed to create CGImage.") | ||
} | ||
|
||
return cgImage | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import Vision | ||
import Cocoa | ||
|
||
final class ImageOCROperation: OCROperation { | ||
|
||
let imageURL: URL | ||
let customLanguages: [String] | ||
|
||
init(fileURL: URL, customLanguages: [String]) { | ||
self.imageURL = fileURL | ||
self.customLanguages = customLanguages | ||
} | ||
|
||
func run() throws -> AsyncThrowingStream<OCRResult, Error> { | ||
guard let image = NSImage(contentsOf: imageURL) else { | ||
throw Failure("Couldn't read image at \(imageURL.path)") | ||
} | ||
|
||
guard let cgImage = image.cgImage(forProposedRect: nil, context: nil, hints: nil) else { | ||
throw Failure("Couldn't read CGImage fir \(imageURL.lastPathComponent)") | ||
} | ||
|
||
let filename = imageURL.deletingPathExtension().lastPathComponent | ||
|
||
let ocr = CGImageOCR(image: cgImage, customLanguages: customLanguages) | ||
|
||
return AsyncThrowingStream { continuation in | ||
Task { | ||
let text = try await ocr.run() | ||
|
||
let result = OCRResult(text: text, suggestedFilename: filename) | ||
|
||
continuation.yield(result) | ||
continuation.finish() | ||
} | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import Vision | ||
import Quartz | ||
|
||
final class PDFOCROperation: OCROperation { | ||
|
||
let documentURL: URL | ||
let customLanguages: [String] | ||
|
||
init(fileURL: URL, customLanguages: [String]) { | ||
self.documentURL = fileURL | ||
self.customLanguages = customLanguages | ||
} | ||
|
||
func run() throws -> AsyncThrowingStream<OCRResult, Error> { | ||
let basename = documentURL.deletingPathExtension().lastPathComponent | ||
|
||
guard let document = CGPDFDocument(documentURL as CFURL) else { | ||
throw Failure("Failed to read PDF at \(documentURL.path)") | ||
} | ||
|
||
guard document.numberOfPages > 0 else { | ||
throw Failure("PDF has no pages at \(documentURL.path)") | ||
} | ||
|
||
return AsyncThrowingStream { continuation in | ||
Task { | ||
for page in (1...document.numberOfPages) { | ||
do { | ||
let cgImage = try document.cgImage(at: page) | ||
|
||
let ocr = CGImageOCR(image: cgImage, customLanguages: customLanguages) | ||
|
||
let text = try await ocr.run() | ||
|
||
let result = OCRResult(text: text, suggestedFilename: basename + "-\(page)") | ||
|
||
continuation.yield(result) | ||
} catch { | ||
/// Don't want to interrupt processing if a single page fails, so don't terminate the stream here. | ||
fputs("WARN: Error processing PDF page #\(page) at \(documentURL.path): \(error)\n", stderr) | ||
} | ||
} | ||
|
||
continuation.finish() | ||
} | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters