Skip to content

Commit

Permalink
Merge pull request getomni-ai#45 from getomni-ai/XL/add-file-extension
Browse files Browse the repository at this point in the history
Fix not getting file types from urls without a file extension
  • Loading branch information
annapo23 authored Sep 24, 2024
2 parents eaf13b2 + cac678d commit f53a201
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 16 deletions.
17 changes: 7 additions & 10 deletions node-zerox/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,29 +47,26 @@ export const zerox = async ({
await fs.ensureDir(tempDirectory);

// Download the PDF. Get file name.
const localPath = await downloadFile({ filePath, tempDir: tempDirectory });
const { extension, localPath } = await downloadFile({
filePath,
tempDir: tempDirectory,
});
if (!localPath) throw "Failed to save file to local drive";

const fileExtension = path.extname(localPath).toLowerCase();

if (!fileExtension) {
throw new Error("File extension missing");
}

// Sort the `pagesToConvertAsImages` array to make sure we use the right index
// for `formattedPages` as `pdf2pic` always returns images in order
if (Array.isArray(pagesToConvertAsImages)) {
pagesToConvertAsImages.sort((a, b) => a - b);
}

// Convert file to PDF if necessary
if (fileExtension !== ".png") {
if (extension !== ".png") {
let pdfPath: string;
if (fileExtension === ".pdf") {
if (extension === ".pdf") {
pdfPath = localPath;
} else {
pdfPath = await convertFileToPdf({
extension: fileExtension,
extension,
localPath,
tempDir: tempDirectory,
});
Expand Down
31 changes: 26 additions & 5 deletions node-zerox/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { pipeline } from "stream/promises";
import { promisify } from "util";
import axios from "axios";
import fs from "fs-extra";
import mime from "mime-types";
import path from "path";

const convertAsync = promisify(convert);
Expand Down Expand Up @@ -86,14 +87,15 @@ export const downloadFile = async ({
}: {
filePath: string;
tempDir: string;
}): Promise<string | void> => {
}): Promise<{ extension: string; localPath: string }> => {
// Shorten the file name by removing URL parameters
const baseFileName = path.basename(filePath.split("?")[0]);
const localPdfPath = path.join(tempDir, baseFileName);
const localPath = path.join(tempDir, baseFileName);
let mimetype;

// Check if filePath is a URL
if (isValidUrl(filePath)) {
const writer = fs.createWriteStream(localPdfPath);
const writer = fs.createWriteStream(localPath);

const response = await axios({
url: filePath,
Expand All @@ -104,12 +106,31 @@ export const downloadFile = async ({
if (response.status !== 200) {
throw new Error(`HTTP error! Status: ${response.status}`);
}
mimetype = response.headers?.["content-type"];
await pipeline(response.data, writer);
} else {
// If filePath is a local file, copy it to the temp directory
await fs.copyFile(filePath, localPdfPath);
await fs.copyFile(filePath, localPath);
}
return localPdfPath;

if (!mimetype) {
mimetype = mime.lookup(localPath);
}

let extension = mime.extension(mimetype) || "";
if (!extension) {
if (mimetype === "binary/octet-stream") {
extension = ".bin";
} else {
throw new Error("File extension missing");
}
}

if (!extension.startsWith(".")) {
extension = `.${extension}`;
}

return { extension, localPath };
};

// Convert each page to a png and save that image to tmp
Expand Down
9 changes: 8 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"child_process": "^1.0.2",
"fs-extra": "^11.2.0",
"libreoffice-convert": "^1.6.0",
"mime-types": "^2.1.35",
"os": "^0.1.2",
"p-limit": "^3.1.0",
"path": "^0.12.7",
Expand All @@ -24,6 +25,7 @@
},
"devDependencies": {
"@types/fs-extra": "^11.0.4",
"@types/mime-types": "^2.1.4",
"@types/node": "^20.14.11",
"typescript": "^5.5.3"
},
Expand Down

0 comments on commit f53a201

Please sign in to comment.