Skip to content

Commit

Permalink
feat(server): Fix exif data parsing (immich-app#1326)
Browse files Browse the repository at this point in the history
* Trying to get exifdata working with different lib.

* Got the new library working.

* Addressing PR comments.

* Removed not used vars and proper place for the eslint disable.

* Fix time-utils to use the exiftool-vendored lib.

Fixed also one test, as that would be valid.

* Using filename for timestamp as well if possible.

* Add new tests for time-utils.

* Remember to gracefully terminate the exiftool instance when not needed.

* eslint ignore...

* Apperantly Dockerfile changes were not pushed.

* feat(dockerfile): Tweak the Server Dockerfile

* feat(server): getTimestampFromFilename should return string or undefined.

* feat(server): If we don't have exifData or timestamp from filename, raise an error.

* Apparently test was already right, but my local system disagrees.

* More utilities for parsing and fix the timestampFromFilename.

It was returning an incorrect date as the regex doesn't seem to be the best for this as files named `IMG_0115.HEIC` will want to get parsed incorrectly due to it.

* feat(server/docker): Install perl as it seems to be required.

* feat(server): remember to include exposureTime and focalLength in new exif data.

* feat(server): Remove the parsing from filename as requested.

* feat(server): Import exiftool differently in time-utils.

* feat(server): Error handling when there is no exifData.

* feat(server): Fixes for the error handling when there is no exifData.

* feat(server): Remember to include modifyDate despite no exif.

* feat(server): Remember to include model of Camera.

* feat(server): Fixing up Exiftool usage.

Including proper logging for it, which had to be done in wrapped fashion due to it expecting all the logging levels which NextJS logger doesn't implement.

* feat(server): Do not use a wrapper for ExifTool logging.

* fix merge conflicts in metadata-extractor
  • Loading branch information
samip5 authored Jan 17, 2023
1 parent 693adf8 commit dff10e8
Show file tree
Hide file tree
Showing 5 changed files with 164 additions and 111 deletions.
4 changes: 2 additions & 2 deletions server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ FROM node:16-alpine3.14 as builder

WORKDIR /usr/src/app

RUN apk add --update-cache build-base python3 libheif vips-dev ffmpeg
RUN apk add --update-cache build-base python3 libheif vips-dev ffmpeg exiftool perl

COPY package.json package-lock.json ./

Expand All @@ -21,7 +21,7 @@ FROM node:16-alpine3.14

WORKDIR /usr/src/app

RUN apk add --no-cache libheif vips ffmpeg
RUN apk add --no-cache libheif vips ffmpeg exiftool perl

COPY --from=prod /usr/src/app/node_modules ./node_modules
COPY --from=prod /usr/src/app/dist ./dist
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import { AssetEntity, ExifEntity } from '@app/infra';
import {
IExifExtractionProcessor,
IVideoLengthExtractionProcessor,
IReverseGeocodingProcessor,
IVideoLengthExtractionProcessor,
QueueName,
JobName,
} from '@app/job';
Expand All @@ -11,16 +11,15 @@ import { Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { InjectRepository } from '@nestjs/typeorm';
import { Job } from 'bull';
import exifr from 'exifr';
import ffmpeg from 'fluent-ffmpeg';
import path from 'path';
import sharp from 'sharp';
import { Repository } from 'typeorm/repository/Repository';
import geocoder, { InitOptions } from 'local-reverse-geocoder';
import { getName } from 'i18n-iso-countries';
import { find } from 'geo-tz';
import * as luxon from 'luxon';
import fs from 'node:fs';
import { ExifDateTime, ExifTool } from 'exiftool-vendored';
import { timeUtils } from '@app/common';

function geocoderInit(init: InitOptions) {
return new Promise<void>(function (resolve) {
Expand Down Expand Up @@ -75,7 +74,6 @@ export type GeoData = {
export class MetadataExtractionProcessor {
private logger = new Logger(MetadataExtractionProcessor.name);
private isGeocodeInitialized = false;

constructor(
@InjectRepository(AssetEntity)
private assetRepository: Repository<AssetEntity>,
Expand All @@ -102,7 +100,7 @@ export class MetadataExtractionProcessor {
configService.get('REVERSE_GEOCODING_DUMP_DIRECTORY') || process.cwd() + '/.reverse-geocoding-dump/',
}).then(() => {
this.isGeocodeInitialized = true;
Logger.log('Reverse Geocoding Initialised');
this.logger.log('Reverse Geocoding Initialised');
});
}
}
Expand Down Expand Up @@ -142,84 +140,48 @@ export class MetadataExtractionProcessor {
async extractExifInfo(job: Job<IExifExtractionProcessor>) {
try {
const { asset, fileName }: { asset: AssetEntity; fileName: string } = job.data;
const exifData = await exifr.parse(asset.originalPath, {
tiff: true,
ifd0: true as any,
ifd1: true,
exif: true,
gps: true,
interop: true,
xmp: true,
icc: true,
iptc: true,
jfif: true,
ihdr: true,
const exiftool = new ExifTool();
const exifData = await exiftool.read(asset.originalPath).catch((e) => {
this.logger.warn(`The exifData parsing failed due to: ${e} on file ${asset.originalPath}`);
});

if (!exifData) {
throw new Error(`can not parse exif data from file ${asset.originalPath}`);
}

const createdAt = new Date(exifData.DateTimeOriginal || exifData.CreateDate || new Date(asset.createdAt));
const exifToDate = (exifDate: string | ExifDateTime | undefined) =>
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
exifDate ? new Date(exifDate.toString()!) : null;

let createdAt = exifToDate(asset.createdAt);
const newExif = new ExifEntity();
if (exifData) {
createdAt = exifToDate(exifData.DateTimeOriginal ?? exifData.CreateDate ?? asset.createdAt);
const modifyDate = exifToDate(exifData.ModifyDate);
newExif.make = exifData['Make'] || null;
newExif.model = exifData['Model'] || null;
newExif.exifImageHeight = exifData['ExifImageHeight'] || exifData['ImageHeight'] || null;
newExif.exifImageWidth = exifData['ExifImageWidth'] || exifData['ImageWidth'] || null;
newExif.exposureTime = (await timeUtils.parseStringToNumber(exifData['ExposureTime'])) || null;
newExif.orientation = exifData['Orientation']?.toString() || null;
newExif.dateTimeOriginal = createdAt;
newExif.modifyDate = modifyDate || null;
newExif.lensModel = exifData['LensModel'] || null;
newExif.fNumber = exifData['FNumber'] || null;
newExif.focalLength = (await timeUtils.parseStringToNumber(exifData['FocalLength'])) || null;
newExif.iso = exifData['ISO'] || null;
newExif.latitude = exifData['GPSLatitude'] || null;
newExif.longitude = exifData['GPSLongitude'] || null;
} else {
newExif.dateTimeOriginal = createdAt;
newExif.modifyDate = exifToDate(asset.modifiedAt);
}
const fileStats = fs.statSync(asset.originalPath);
const fileSizeInBytes = fileStats.size;

const newExif = new ExifEntity();
newExif.assetId = asset.id;
newExif.make = exifData['Make'] || null;
newExif.model = exifData['Model'] || null;
newExif.imageName = path.parse(fileName).name || null;
newExif.exifImageHeight = exifData['ExifImageHeight'] || exifData['ImageHeight'] || null;
newExif.exifImageWidth = exifData['ExifImageWidth'] || exifData['ImageWidth'] || null;
newExif.fileSizeInByte = fileSizeInBytes || null;
newExif.orientation = exifData['Orientation'] || null;
newExif.dateTimeOriginal = createdAt;
newExif.modifyDate = exifData['ModifyDate'] || null;
newExif.lensModel = exifData['LensModel'] || null;
newExif.fNumber = exifData['FNumber'] || null;
newExif.focalLength = exifData['FocalLength'] || null;
newExif.iso = exifData['ISO'] || null;
newExif.exposureTime = exifData['ExposureTime'] || null;
newExif.latitude = exifData['latitude'] || null;
newExif.longitude = exifData['longitude'] || null;

/**
* Correctly store UTC time based on timezone
* The timestamp being extracted from EXIF is based on the timezone
* of the container. We need to correct it to UTC time based on the
* timezone of the location.
*
* The timezone of the location can be exracted from the lat/lon
* GPS coordinates.
*
* Any assets that doesn't have this information will used the
* createdAt timestamp of the asset instead.
*
* The updated/corrected timestamp will be used to update the
* createdAt timestamp in the asset table. So that the information
* is consistent across the database.
* */
if (newExif.longitude && newExif.latitude) {
const tz = find(newExif.latitude, newExif.longitude)[0];
const localTimeWithTimezone = createdAt.toISOString();

if (localTimeWithTimezone.length == 24) {
// Remove the last character
const localTimeWithoutTimezone = localTimeWithTimezone.slice(0, -1);
const correctUTCTime = luxon.DateTime.fromISO(localTimeWithoutTimezone, { zone: tz }).toUTC().toISO();
newExif.dateTimeOriginal = new Date(correctUTCTime);
await this.assetRepository.save({
id: asset.id,
createdAt: correctUTCTime,
});
}
} else {
await this.assetRepository.save({
id: asset.id,
createdAt: createdAt.toISOString(),
});
}
await this.assetRepository.save({
id: asset.id,
createdAt: createdAt?.toISOString(),
});

/**
* Reverse Geocoding
Expand Down Expand Up @@ -255,6 +217,7 @@ export class MetadataExtractionProcessor {
}

await this.exifRepository.save(newExif);
await exiftool.end();
} catch (error: any) {
this.logger.error(`Error extracting EXIF ${error}`, error?.stack);
}
Expand Down
38 changes: 22 additions & 16 deletions server/libs/common/src/utils/time-utils.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import exifr from 'exifr';
// This is needed as resolving for the vendored
// exiftool fails in tests otherwise but as it's not meant to be a requirement
// of a project directly I had to include the line below the comment.
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
import { exiftool } from 'exiftool-vendored.pl';

function createTimeUtils() {
const floatRegex = /[+-]?([0-9]*[.])?[0-9]+/;
const checkValidTimestamp = (timestamp: string): boolean => {
const parsedTimestamp = Date.parse(timestamp);

Expand All @@ -19,30 +25,30 @@ function createTimeUtils() {

const getTimestampFromExif = async (originalPath: string): Promise<string> => {
try {
const exifData = await exifr.parse(originalPath, {
tiff: true,
ifd0: true as any,
ifd1: true,
exif: true,
gps: true,
interop: true,
xmp: true,
icc: true,
iptc: true,
jfif: true,
ihdr: true,
});
const exifData = await exiftool.read(originalPath);

if (exifData && exifData['DateTimeOriginal']) {
return exifData['DateTimeOriginal'];
await exiftool.end();
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
return exifData['DateTimeOriginal'].toString()!;
} else {
return new Date().toISOString();
}
} catch (error) {
return new Date().toISOString();
}
};
return { checkValidTimestamp, getTimestampFromExif };

const parseStringToNumber = async (original: string | undefined): Promise<number | null> => {
const match = original?.match(floatRegex)?.[0];
if (match) {
return parseFloat(match);
} else {
return null;
}
};

return { checkValidTimestamp, getTimestampFromExif, parseStringToNumber };
}

export const timeUtils = createTimeUtils();
Loading

0 comments on commit dff10e8

Please sign in to comment.