fix: isLikelyAacData should match any aac data but only aac data (vid…

…eojs#347) Prior to this change isLikelyAacData returned true for any data that starts `ID3` string bytes, which can be almost any audio file. Now we check at the start of the file, or after `ID3` bytes, for the aac syncword and verify that the layer bits are set to 0.
keeful · Jul 10, 2020 · 315e04a · 315e04a
1 parent 37a6e7d
commit 315e04a
Show file tree

Hide file tree

Showing 5 changed files with 63 additions and 40 deletions.
diff --git a/lib/aac/utils.js b/lib/aac/utils.js
@@ -24,13 +24,37 @@ var ADTS_SAMPLING_FREQUENCIES = [
   7350
 ];
 
+var parseId3TagSize = function(header, byteIndex) {
+  var
+    returnSize = (header[byteIndex + 6] << 21) |
+                 (header[byteIndex + 7] << 14) |
+                 (header[byteIndex + 8] << 7) |
+                 (header[byteIndex + 9]),
+    flags = header[byteIndex + 5],
+    footerPresent = (flags & 16) >> 4;
+
+  if (footerPresent) {
+    return returnSize + 20;
+  }
+  return returnSize + 10;
+};
+
+// TODO: use vhs-utils
 var isLikelyAacData = function(data) {
-  if ((data[0] === 'I'.charCodeAt(0)) &&
+  var offset = 0;
+
+  if (data.length > 10 && (data[0] === 'I'.charCodeAt(0)) &&
       (data[1] === 'D'.charCodeAt(0)) &&
       (data[2] === '3'.charCodeAt(0))) {
-    return true;
+    offset = parseId3TagSize(data, 0);
   }
-  return false;
+
+  return data.length >= offset + 2 &&
+    (data[offset] & 0xFF) === 0xFF &&
+    (data[offset + 1] & 0xF0) === 0xF0 &&
+    // verify that the 2 layer bits are 0, aka this
+    // is not mp3 data but aac data.
+    (data[offset + 1] & 0x16) === 0x10;
 };
 
 var parseSyncSafeInteger = function(data) {
@@ -56,21 +80,6 @@ var parseIso88591 = function(bytes, start, end) {
   return unescape(percentEncode(bytes, start, end)); // jshint ignore:line
 };
 
-var parseId3TagSize = function(header, byteIndex) {
-  var
-    returnSize = (header[byteIndex + 6] << 21) |
-                 (header[byteIndex + 7] << 14) |
-                 (header[byteIndex + 8] << 7) |
-                 (header[byteIndex + 9]),
-    flags = header[byteIndex + 5],
-    footerPresent = (flags & 16) >> 4;
-
-  if (footerPresent) {
-    return returnSize + 20;
-  }
-  return returnSize + 10;
-};
-
 var parseAdtsSize = function(header, byteIndex) {
   var
     lowThree = (header[byteIndex + 5] & 0xE0) >> 5,

diff --git a/lib/mp4/transmuxer.js b/lib/mp4/transmuxer.js
@@ -911,27 +911,29 @@ Transmuxer = function(options) {
     });
 
     pipeline.aacStream.on('data', function(data) {
-      if (data.type === 'timed-metadata' && !pipeline.audioSegmentStream) {
-        audioTrack = audioTrack || {
-          timelineStartInfo: {
-            baseMediaDecodeTime: self.baseMediaDecodeTime
-          },
-          codec: 'adts',
-          type: 'audio'
-        };
-        // hook up the audio segment stream to the first track with aac data
-        pipeline.coalesceStream.numberOfTracks++;
-        pipeline.audioSegmentStream = new AudioSegmentStream(audioTrack, options);
-
-        pipeline.audioSegmentStream.on('timingInfo',
-          self.trigger.bind(self, 'audioTimingInfo'));
-
-        // Set up the final part of the audio pipeline
-        pipeline.adtsStream
-          .pipe(pipeline.audioSegmentStream)
-          .pipe(pipeline.coalesceStream);
+      if ((data.type !== 'timed-metadata' && data.type !== 'audio') || pipeline.audioSegmentStream) {
+        return;
       }
 
+      audioTrack = audioTrack || {
+        timelineStartInfo: {
+          baseMediaDecodeTime: self.baseMediaDecodeTime
+        },
+        codec: 'adts',
+        type: 'audio'
+      };
+      // hook up the audio segment stream to the first track with aac data
+      pipeline.coalesceStream.numberOfTracks++;
+      pipeline.audioSegmentStream = new AudioSegmentStream(audioTrack, options);
+
+      pipeline.audioSegmentStream.on('timingInfo',
+        self.trigger.bind(self, 'audioTimingInfo'));
+
+      // Set up the final part of the audio pipeline
+      pipeline.adtsStream
+        .pipe(pipeline.audioSegmentStream)
+        .pipe(pipeline.coalesceStream);
+
       // emit pmt info
       self.trigger('trackinfo', {
         hasAudio: !!audioTrack,

diff --git a/lib/partial/transmuxer.js b/lib/partial/transmuxer.js
@@ -186,7 +186,7 @@ var aacPipeline = function(options) {
   });
 
   pipeline.aacStream.on('data', function(data) {
-    if (data.type !== 'timed-metadata' || pipeline.audioSegmentStream) {
+    if ((data.type !== 'timed-metadata' && data.type !== 'audio') || pipeline.audioSegmentStream) {
       return;
     }
 

diff --git a/test/aac-utils.test.js b/test/aac-utils.test.js
@@ -11,6 +11,16 @@ var audioFrameOffset = 73;
 
 QUnit.module('AAC Utils');
 
+QUnit.test('correctly determines aac data', function(assert) {
+  assert.ok(utils.isLikelyAacData(testSegment), 'test segment is aac');
+
+  var id3Offset = utils.parseId3TagSize(testSegment, 0);
+  assert.ok(utils.isLikelyAacData(testSegment.subarray(id3Offset)), 'test segment is aac without id3');
+  assert.notOk(utils.isLikelyAacData(testSegment.subarray(id3Offset + 25)), 'non aac data not recognized');
+  assert.notOk(utils.isLikelyAacData(testSegment.subarray(0, 5)), 'not enough aac data is not recognized');
+});
+
+
 QUnit.test('correctly parses aac packet type', function() {
   QUnit.equal(utils.parseType(testSegment, id3TagOffset), 'timed-metadata',
     'parsed timed-metadata type');

diff --git a/test/transmuxer.test.js b/test/transmuxer.test.js
@@ -3791,14 +3791,14 @@ QUnit.test('pipeline dynamically configures itself based on input', function() {
   transmuxer.flush();
   QUnit.equal(transmuxer.transmuxPipeline_.type, 'ts', 'detected TS file data');
 
-  transmuxer.push(new Uint8Array(id3.id3Tag(id3.id3Frame('PRIV', 0x00, 0x01))));
+  transmuxer.push(new Uint8Array(id3.id3Tag(id3.id3Frame('PRIV', 0x00, 0x01)).concat([0xFF, 0xF1])));
   transmuxer.flush();
   QUnit.equal(transmuxer.transmuxPipeline_.type, 'aac', 'detected AAC file data');
 });
 
 QUnit.test('reuses audio track object when the pipeline reconfigures itself', function() {
   var boxes, segments = [],
-    id3Tag = new Uint8Array(73),
+    id3Tag = new Uint8Array(75),
     streamTimestamp = 'com.apple.streaming.transportStreamTimestamp',
     priv = 'PRIV',
     i,
@@ -3813,6 +3813,8 @@ QUnit.test('reuses audio track object when the pipeline reconfigures itself', fu
   id3Tag[70] = 13;
   id3Tag[71] = 187;
   id3Tag[72] = 160;
+  id3Tag[73] = 0xFF;
+  id3Tag[74] = 0xF1;
 
   for (i = 0; i < priv.length; i++) {
     id3Tag[i + 10] = priv.charCodeAt(i);
-Original file line number
+Diff line change
@@ Expand Up / @@ -186,7 +186,7 @@ var aacPipeline = function(options) { @@
       });
       pipeline.aacStream.on('data', function(data) {
-        if (data.type !== 'timed-metadata' || pipeline.audioSegmentStream) {
+        if ((data.type !== 'timed-metadata' && data.type !== 'audio') || pipeline.audioSegmentStream) {
           return;
         }
@@ Expand Down @@