From ab867b14d23ec237c8c0a1520c13cd9c17e521a3 Mon Sep 17 00:00:00 2001 From: jrivera Date: Thu, 3 Dec 2015 18:59:12 -0500 Subject: [PATCH 1/3] Fix sei/caption parsing * SEI nal units are purged of any emulation prevention bytes * SEI parsing now works with SEI nals containing multiple messages * Caption parsing now treats preamble address codes as spaces, improving readability * Caption parsing now correctly ignores redundant control codes * Captions packets are reordered based on PTS before being processed to handle out-of-order caption packets arising from B-frames --- lib/codecs/h264.js | 1 + lib/m2ts/caption-stream.js | 142 ++++++++---- test/caption-stream-test.js | 431 ++++++++++++++++++++++-------------- 3 files changed, 374 insertions(+), 200 deletions(-) diff --git a/lib/codecs/h264.js b/lib/codecs/h264.js index c9e3fa08..60eadf9d 100644 --- a/lib/codecs/h264.js +++ b/lib/codecs/h264.js @@ -152,6 +152,7 @@ H264Stream = function() { break; case 0x06: event.nalUnitType = 'sei_rbsp'; + event.escapedRBSP = discardEmulationPreventionBytes(data.subarray(1)); break; case 0x07: event.nalUnitType = 'seq_parameter_set_rbsp'; diff --git a/lib/m2ts/caption-stream.js b/lib/m2ts/caption-stream.js index 42b44821..2c7450e2 100644 --- a/lib/m2ts/caption-stream.js +++ b/lib/m2ts/caption-stream.js @@ -20,38 +20,62 @@ // payload type field to indicate how they are to be // interpreted. CEAS-708 caption content is always transmitted with // payload type 0x04. - var USER_DATA_REGISTERED_ITU_T_T35 = 4; + var USER_DATA_REGISTERED_ITU_T_T35 = 4, + RBSP_TRAILING_BITS = 128; /** * Parse a supplemental enhancement information (SEI) NAL unit. + * Stops parsing once a message of type ITU T T35 has been found. * * @param bytes {Uint8Array} the bytes of a SEI NAL unit * @return {object} the parsed SEI payload * @see Rec. ITU-T H.264, 7.3.2.3.1 */ var parseSei = function(bytes) { - var result = { - payloadType: -1, - payloadSize: 0, - }, i; - - // parse the payload type - // if the payload type is not user_data_registered_itu_t_t35, - // don't bother parsing any further - if (bytes[1] !== USER_DATA_REGISTERED_ITU_T_T35) { - return result; - } - result.payloadType = USER_DATA_REGISTERED_ITU_T_T35; + var + i = 0, + result = { + payloadType: -1, + payloadSize: 0, + }, + payloadType = 0, + payloadSize = 0; + + // go through the sei_rbsp parsing each each individual sei_message + while (i < bytes.byteLength) { + // stop once we have hit the end of the sei_rbsp + if (bytes[i] === RBSP_TRAILING_BITS) { + break; + } - // parse the payload size - for (i = 2; i < bytes.length && bytes[i] === 0xff; i++) { - result.payloadSize += 255; - } - result.payloadSize <<= 8; - result.payloadSize |= bytes[i]; - i++; + // Parse payload type + while (bytes[i] === 0xFF) { + payloadType += 255; + i++; + } + payloadType += bytes[i++]; - result.payload = bytes.subarray(i, i + result.payloadSize); + // Parse payload size + while (bytes[i] === 0xFF) { + payloadSize += 255; + i++; + } + payloadSize += bytes[i++]; + + // this sei_message is a 608/708 caption so save it and break + // there can only ever be one caption message in a frame's sei + if (!result.payload && payloadType === USER_DATA_REGISTERED_ITU_T_T35) { + result.payloadType = payloadType; + result.payloadSize = payloadSize; + result.payload = bytes.subarray(i, i + payloadSize); + break; + } + + // skip the payload and parse the next message + i += payloadSize; + payloadType = 0; + payloadSize = 0; + } return result; }; @@ -115,21 +139,28 @@ }; var CaptionStream = function() { + var self = this; CaptionStream.prototype.init.call(this); + this.captionPackets_ = []; + this.field1_ = new Cea608Stream(); + + // forward data and done events from field1_ to this CaptionStream this.field1_.on('data', this.trigger.bind(this, 'data')); + this.field1_.on('done', this.trigger.bind(this, 'done')); }; CaptionStream.prototype = new muxjs.utils.Stream(); CaptionStream.prototype.push = function(event) { - var sei, userData, captionPackets, i; + var sei, userData, captionPackets; // only examine SEI NALs if (event.nalUnitType !== 'sei_rbsp') { return; } + // parse the sei - sei = parseSei(event.data); + sei = parseSei(event.escapedRBSP); // ignore everything but user_data_registered_itu_t_t35 if (sei.payloadType !== USER_DATA_REGISTERED_ITU_T_T35) { @@ -144,22 +175,35 @@ return; } - // parse out CC data packets - captionPackets = parseCaptionPackets(event.pts, userData); + // parse out CC data packets and save them for later + this.captionPackets_ = this.captionPackets_.concat(parseCaptionPackets(event.pts, userData)); + }; - // send the data to the appropriate field - for (i = 0; i < captionPackets.length; i++) { - if (captionPackets[i].type === 0) { - this.field1_.push(captionPackets[i]); - } + CaptionStream.prototype.flush = function () { + // make sure we actually parsed captions before proceeding + if (!this.captionPackets_.length) { + this.field1_.flush(); + return; } - }; + // sort caption byte-pairs based on their PTS values + this.captionPackets_.sort(function(a, b) { + return a.pts - b.pts; + }); + + // Push each caption into Cea608Stream + this.captionPackets_.forEach(this.field1_.push, this.field1_); + + this.captionPackets_.length = 0; + this.field1_.flush(); + return; + }; // ---------------------- // Session to Application // ---------------------- var BASIC_CHARACTER_TRANSLATION = { + 0x2a: 0xe1, 0x5c: 0xe9, 0x5e: 0xed, 0x5f: 0xf3, @@ -168,7 +212,6 @@ 0x7c: 0xf7, 0x7d: 0xd1, 0x7e: 0xf1, - 0x2a: 0xe1, 0x7f: 0x2588 }; @@ -186,8 +229,8 @@ ROLL_UP_2_ROWS = 0x1425, ROLL_UP_3_ROWS = 0x1426, ROLL_UP_4_ROWS = 0x1427, + RESUME_DIRECT_CAPTIONING = 0x1429, CARRIAGE_RETURN = 0x142d, - // Erasure BACKSPACE = 0x1421, ERASE_DISPLAYED_MEMORY = 0x142c, @@ -217,16 +260,29 @@ this.startPts_ = 0; this.displayed_ = createDisplayBuffer(); this.nonDisplayed_ = createDisplayBuffer(); + this.lastControlCode_ = null; this.push = function(packet) { - var data, swap, charCode; + var data, swap, char0, char1; // remove the parity bits data = packet.ccData & 0x7f7f; + // ignore duplicate control codes + if (data === this.lastControlCode_) { + this.lastControlCode_ = null; + return; + } + + // Store control codes + if ((data & 0xf000) === 0x1000) { + this.lastControlCode_ = data; + } else { + this.lastControlCode_ = null; + } + switch (data) { case PADDING: break; - case RESUME_CAPTION_LOADING: this.mode_ = 'popOn'; break; @@ -275,17 +331,25 @@ case ERASE_NON_DISPLAYED_MEMORY: this.nonDisplayed_ = createDisplayBuffer(); break; - default: - charCode = data >>> 8; + char0 = data >>> 8; + char1 = data & 0xff; + + // Look for a Channel 1 Preamble Address Code + if (char0 >= 0x10 && char0 <= 0x17 && + char1 >= 0x40 && char1 <= 0x7F && + (char0 !== 0x10 || char1 < 0x60)) { + // Follow Safari's lead and replace the PAC with a space + char0 = char1 = 0x20; + } // ignore unsupported control codes - if ((charCode & 0xf0) === 0x10) { + if ((char0 & 0xf0) === 0x10) { return; } // character handling is dependent on the current mode - this[this.mode_](packet.pts, charCode, data & 0xff); + this[this.mode_](packet.pts, char0, char1); break; } }; diff --git a/test/caption-stream-test.js b/test/caption-stream-test.js index a470e676..969ac494 100644 --- a/test/caption-stream-test.js +++ b/test/caption-stream-test.js @@ -9,17 +9,38 @@ } }); - QUnit.skip('parses SEIs larger than 255 bytes', function() { - var captions = []; - captionStream.on('data', function(caption) { - captions.push(caption); - }); + test('parses SEIs larger than 255 bytes', function() { + var packets = [], data; + captionStream.field1_.push = function(packet) { + packets.push(packet); + }; + data = new Uint8Array(312); + data[0] = 0x01, // payload_type !== user_data_registered_itu_t_t35 + data[1] = 0xFF, // payload_size + data[2] = 0x26 // payload_size + data[296] = 0x04, // payload_type === user_data_registered_itu_t_t35 + data[297] = 0x0d, // payload_size + data[298] = 181, // itu_t_t35_country_code + data[299] = 0x00, + data[300] = 0x31, // itu_t_t35_provider_code + data[301] = 0x47, + data[302] = 0x41, + data[303] = 0x39, + data[304] = 0x34, // user_identifier, "GA94" + data[305] = 0x03, //user_data_type_code, 0x03 is cc_data + data[306] = 0xc1, // process_cc_data, cc_count + data[307] = 0xff, // reserved + data[308] = 0xfc, // cc_valid, cc_type (608, field 1) + data[309] = 0xff, // cc_data_1 with parity bit set + data[310] = 0x0e, // cc_data_2 without parity bit set + data[311] = 0xff // marker_bits + captionStream.push({ nalUnitType: 'sei_rbsp', - data: new Uint8Array(312) + escapedRBSP: data }); - - equal(captions.length, 1, 'parsed a caption'); + captionStream.flush(); + equal(packets.length, 1, 'parsed a caption'); }); test('ignores SEIs that do not have type user_data_registered_itu_t_t35', function() { @@ -29,8 +50,7 @@ }); captionStream.push({ nalUnitType: 'sei_rbsp', - data: new Uint8Array([ - 0x06, // nal_unit_type + escapedRBSP: new Uint8Array([ 0x05 // payload_type !== user_data_registered_itu_t_t35 ]) }); @@ -45,9 +65,8 @@ }; captionStream.push({ nalUnitType: 'sei_rbsp', - data: new Uint8Array([ - 0x06, // nal_unit_type - 0x04, // payload_type !== user_data_registered_itu_t_t35 + escapedRBSP: new Uint8Array([ + 0x04, // payload_type === user_data_registered_itu_t_t35 0x0d, // payload_size @@ -67,7 +86,7 @@ 0xff // marker_bits ]) }); - + captionStream.flush(); equal(packets.length, 1, 'parsed a caption packet'); }); @@ -90,7 +109,7 @@ transmuxer.flush(); equal(captions.length, 2, 'parsed two captions'); - equal(captions[0].text.indexOf('ASUKA'), 0, 'parsed the start of the first caption'); + equal(captions[0].text.indexOf(' ASUKA'), 0, 'parsed the start of the first caption'); ok(captions[0].text.indexOf('Japanese') > 0, 'parsed the end of the first caption'); equal(captions[0].startTime, 1, 'parsed the start time'); equal(captions[0].endTime, 4, 'parsed the end time'); @@ -123,9 +142,9 @@ }); test('converts non-standard character codes to ASCII', function() { - var packets, captions, i; + var packets, captions; packets = [ - // RCL, resume caption loading + // RCL, resume caption loading { ccData: 0x1420 }, // ASCII exceptions { ccData: 0x2a5c }, @@ -135,6 +154,8 @@ { ccData: 0x7e7f }, // EOC, End of Caption { pts: 1000, ccData: 0x142f }, + // RCL, resume caption loading + { ccData: 0x1420 }, // EOC, End of Caption, clear the display { pts: 10 * 1000, ccData: 0x142f } ]; @@ -142,9 +163,8 @@ cea608Stream.on('data', function(caption) { captions.push(caption); }); - for (i = 0; i < packets.length; i++) { - cea608Stream.push(packets[i]); - } + + packets.forEach(cea608Stream.push, cea608Stream); equal(captions[0].text, String.fromCharCode(0xe1, 0xe9, 0xed, 0xf3, 0xfa, 0xe7, 0xf7, 0xd1, 0xf1, 0x2588), @@ -154,12 +174,14 @@ test('pop-on mode', function() { var packets, captions; packets = [ - // RCL, resume caption loading + // RCL, resume caption loading { ccData: 0x1420 }, // 'hi' { ccData: characters('hi') }, // EOC, End of Caption. Finished transmitting, begin display { pts: 1000, ccData: 0x142f }, + // RCL, resume caption loading + { ccData: 0x1420 }, // EOC, End of Caption. End display { pts: 10 * 1000, ccData: 0x142f } ]; @@ -169,10 +191,7 @@ captions.push(caption); }); - cea608Stream.push(packets[0]); - cea608Stream.push(packets[1]); - cea608Stream.push(packets[2]); - cea608Stream.push(packets[3]); + packets.forEach(cea608Stream.push, cea608Stream); equal(captions.length, 1, 'detected a caption') deepEqual(captions[0], { @@ -185,7 +204,7 @@ test('recognizes the Erase Displayed Memory command', function() { var packets, captions; packets = [ - // RCL, resume caption loading + // RCL, resume caption loading { ccData: 0x1420 }, // '01' { ccData: characters('01') }, @@ -201,6 +220,8 @@ { ccData: characters('34') }, // EOC, End of Caption. Display '34' { pts: 3 * 1000, ccData: 0x142f }, + // RCL, resume caption loading + { ccData: 0x1420 }, // EOC, End of Caption { pts: 4 * 1000, ccData: 0x142f } ]; @@ -210,9 +231,8 @@ captions.push(caption); }); - packets.forEach(function(packet) { - cea608Stream.push(packet); - }); + packets.forEach(cea608Stream.push, cea608Stream); + equal(captions.length, 3, 'detected three captions'); deepEqual(captions[0], { startPts: 1 * 1000, @@ -232,43 +252,48 @@ }); test('backspaces are applied to non-displayed memory', function() { - var captions = []; + var captions = [], packets; cea608Stream.on('data', function(caption) { captions.push(caption); }); - // RCL, resume caption loading - cea608Stream.push({ ccData: 0x1420 }); - // '01' - cea608Stream.push({ - ccData: characters('01') - }); - // backspace - cea608Stream.push({ ccData: 0x1421 }); - cea608Stream.push({ - ccData: characters('23') - }); - // EOC, End of Caption - cea608Stream.push({ pts: 1 * 1000, ccData: 0x142f }); - // EOC, End of Caption - cea608Stream.push({ pts: 3 * 1000, ccData: 0x142f }); + packets = [ + // RCL, resume caption loading + { ccData: 0x1420 }, + // '01' + { ccData: characters('01') }, + // backspace + { ccData: 0x1421 }, + { ccData: characters('23') }, + // EOC, End of Caption + { pts: 1 * 1000, ccData: 0x142f }, + // RCL, resume caption loading + { ccData: 0x1420 }, + // EOC, End of Caption + { pts: 3 * 1000, ccData: 0x142f } + ]; + + packets.forEach(cea608Stream.push, cea608Stream); equal(captions.length, 1, 'detected a caption'); equal(captions[0].text, '023', 'applied the backspace'); }); test('backspaces on cleared memory are no-ops', function() { - var captions = []; + var captions = [], packets; cea608Stream.on('data', function(caption) { captions.push(caption); }); + packets = [ + // RCL, resume caption loading + { ccData: 0x1420 }, + // backspace + { ccData: 0x1421 }, + // EOC, End of Caption. Finished transmitting, display '01' + { pts: 1 * 1000, ccData: 0x142f } + ]; - // RCL, resume caption loading - cea608Stream.push({ ccData: 0x1420 }); - // backspace - cea608Stream.push({ ccData: 0x1421 }); - // EOC, End of Caption. Finished transmitting, display '01' - cea608Stream.push({ pts: 1 * 1000, ccData: 0x142f }); + packets.forEach(cea608Stream.push, cea608Stream); equal(captions.length, 0, 'no captions detected'); }); @@ -276,7 +301,7 @@ test('recognizes the Erase Non-Displayed Memory command', function() { var packets, captions; packets = [ - // RCL, resume caption loading + // RCL, resume caption loading { ccData: 0x1420 }, // '01' { ccData: characters('01') }, @@ -285,17 +310,21 @@ { ccData: characters('23') }, // EOC, End of Caption. Finished transmitting, display '23' { pts: 1 * 1000, ccData: 0x142f }, + // RCL, resume caption loading + { ccData: 0x1420 }, // EOC, End of Caption { pts: 2 * 1000, ccData: 0x142f } ]; captions = []; + + packets.forEach(cea608Stream.push, cea608Stream); + cea608Stream.on('data', function(caption) { captions.push(caption); }); - packets.forEach(function(packet) { - cea608Stream.push(packet); - }); + packets.forEach(cea608Stream.push, cea608Stream); + equal(captions.length, 1, 'detected one caption'); deepEqual(captions[0], { startPts: 1 * 1000, @@ -307,7 +336,7 @@ test('ignores unrecognized commands', function() { var packets, captions; packets = [ - // RCL, resume caption loading + // RCL, resume caption loading { ccData: 0x1420 }, // a row-9 indent 28 underline, which is not supported { ccData: 0x1f7f }, @@ -315,6 +344,8 @@ { ccData: characters('01') }, // EOC, End of Caption { pts: 1 * 1000, ccData: 0x142f }, + // RCL, resume caption loading + { ccData: 0x1420 }, // EOC, End of Caption { pts: 2 * 1000, ccData: 0x142f } ]; @@ -323,9 +354,8 @@ captions.push(caption); }); - packets.forEach(function(packet) { - cea608Stream.push(packet); - }); + packets.forEach(cea608Stream.push, cea608Stream); + equal(captions[0].text, '01', 'skipped the unrecognized commands'); }); @@ -339,15 +369,16 @@ captions.push(caption); }); - // RU2, roll-up captions 2 rows - cea608Stream.push({ ccData: 0x1425 }); - // '01' - cea608Stream.push({ - pts: 1 * 1000, - ccData: characters('01') - }); - // CR, carriage return - cea608Stream.push({ pts: 3 * 1000, ccData: 0x142d }); + [ // RU2, roll-up captions 2 rows + { ccData: 0x1425 }, + // '01' + { + pts: 1 * 1000, + ccData: characters('01') + }, + // CR, carriage return + { pts: 3 * 1000, ccData: 0x142d } + ].forEach(cea608Stream.push, cea608Stream); equal(captions.length, 1, 'detected one caption'); deepEqual(captions[0], { @@ -357,15 +388,17 @@ }, 'parsed the caption') captions = []; - // RU4, roll-up captions 4 rows - cea608Stream.push({ ccdata: 0x1427 }); - // '23' - cea608Stream.push({ - pts: 4 * 1000, - ccData: characters('23') - }); - // CR - cea608Stream.push({ pts: 5 * 1000, ccData: 0x142d }); + [ // RU4, roll-up captions 4 rows + { ccdata: 0x1427 }, + // '23' + { + pts: 4 * 1000, + ccData: characters('23') + }, + // CR + { pts: 5 * 1000, ccData: 0x142d } + ].forEach(cea608Stream.push, cea608Stream); + equal(captions.length, 3, 'detected another caption'); deepEqual(captions[0], { startPts: 3 * 1000, @@ -390,15 +423,17 @@ captions.push(caption); }); - // RU2, roll-up captions 2 rows - cea608Stream.push({ ccData: 0x1425 }); - // '01' - cea608Stream.push({ - pts: 0 * 1000, - ccData: characters('01') - }); - // CR, carriage return - cea608Stream.push({ pts: 1 * 1000, ccData: 0x142d }); + [ // RU2, roll-up captions 2 rows + { ccData: 0x1425 }, + // '01' + { + pts: 0 * 1000, + ccData: characters('01') + }, + // CR, carriage return + { pts: 1 * 1000, ccData: 0x142d } + ].forEach(cea608Stream.push, cea608Stream); + equal(captions.length, 1, 'detected a caption'); deepEqual(captions[0], { startPts: 0 * 1000, @@ -407,13 +442,15 @@ }, 'created a caption for the first period'); captions = []; - // '23' - cea608Stream.push({ - pts: 2 * 1000, - ccData: characters('23') - }); - // CR, carriage return - cea608Stream.push({ pts: 3 * 1000, ccData: 0x142d }); + [ // '23' + { + pts: 2 * 1000, + ccData: characters('23') + }, + // CR, carriage return + { pts: 3 * 1000, ccData: 0x142d } + ].forEach(cea608Stream.push, cea608Stream); + equal(captions.length, 3, 'detected three captions'); deepEqual(captions[0], { startPts: 1 * 1000, @@ -432,13 +469,15 @@ }, 'created the bottom row for the second period'); captions = []; - // '45' - cea608Stream.push({ - pts: 4 * 1000, - ccData: characters('45') - }); - // CR, carriage return - cea608Stream.push({ pts: 5 * 1000, ccData: 0x142d }); + [ // '45' + { + pts: 4 * 1000, + ccData: characters('45') + }, + // CR, carriage return + { pts: 5 * 1000, ccData: 0x142d } + ].forEach(cea608Stream.push, cea608Stream); + equal(captions.length, 3, 'detected three captions'); deepEqual(captions[0], { startPts: 3 * 1000, @@ -463,28 +502,34 @@ captions.push(caption); }); - // RU2, roll-up captions 2 rows - cea608Stream.push({ ccData: 0x1425 }); - // '01' - cea608Stream.push({ - pts: 0 * 1000, - ccData: characters('01') - }); - // CR, carriage return - cea608Stream.push({ pts: 1 * 1000, ccData: 0x142d }); + [ // RU2, roll-up captions 2 rows + { ccData: 0x1425 }, + // '01' + { + pts: 0 * 1000, + ccData: characters('01') + }, + // CR, carriage return + { pts: 1 * 1000, ccData: 0x142d } + ].forEach(cea608Stream.push, cea608Stream); + captions = []; - // RU3, roll-up captions 3 rows - cea608Stream.push({ ccData: 0x1426 }); - // CR, carriage return - cea608Stream.push({ pts: 2 * 1000, ccData: 0x142d }); + [ // RU3, roll-up captions 3 rows + { ccData: 0x1426 }, + // CR, carriage return + { pts: 2 * 1000, ccData: 0x142d } + ].forEach(cea608Stream.push, cea608Stream); + equal(captions.length, 1, 'still displaying a caption'); captions = []; - // RU4, roll-up captions 4 rows - cea608Stream.push({ ccData: 0x1427 }); - // CR, carriage return - cea608Stream.push({ pts: 3 * 1000, ccData: 0x142d }); + [ // RU4, roll-up captions 4 rows + { ccData: 0x1427 }, + // CR, carriage return + { pts: 3 * 1000, ccData: 0x142d } + ].forEach(cea608Stream.push, cea608Stream); + equal(captions.length, 1, 'still displaying a caption'); captions = []; @@ -499,21 +544,22 @@ captions.push(caption); }); - // RU2, roll-up captions 2 rows - cea608Stream.push({ ccData: 0x1425 }); - // '01' - cea608Stream.push({ - pts: 0 * 1000, - ccData: characters('01') - }); - // backspace - cea608Stream.push({ ccData: 0x1421 }); - cea608Stream.push({ - pts: 1 * 1000, - ccData: characters('23') - }); - // CR, carriage return - cea608Stream.push({ pts: 1 * 1000, ccData: 0x142d }); + [ // RU2, roll-up captions 2 rows + { ccData: 0x1425 }, + // '01' + { + pts: 0 * 1000, + ccData: characters('01') + }, + // backspace + { ccData: 0x1421 }, + { + pts: 1 * 1000, + ccData: characters('23') + }, + // CR, carriage return + { pts: 1 * 1000, ccData: 0x142d } + ].forEach(cea608Stream.push, cea608Stream); equal(captions.length, 1, 'detected a caption'); equal(captions[0].text, '023', 'applied the backspace'); @@ -525,44 +571,107 @@ captions.push(caption); }); - // RU2, roll-up captions 2 rows - cea608Stream.push({ ccData: 0x1425 }); - // '01' - cea608Stream.push({ - pts: 0 * 1000, - ccData: characters('01') - }); - // backspace - cea608Stream.push({ ccData: 0x1421 }); - // backspace - cea608Stream.push({ ccData: 0x1421 }); - // CR, carriage return - cea608Stream.push({ pts: 1 * 1000, ccData: 0x142d }); + [ // RU2, roll-up captions 2 rows + { ccData: 0x1425 }, + // '01' + { + pts: 0 * 1000, + ccData: characters('01') + }, + // backspace + { ccData: 0x1421 }, + // backspace + { ccData: 0x1421 }, // duplicate is ignored + // backspace + { ccData: 0x1421 }, + // CR, carriage return + { pts: 1 * 1000, ccData: 0x142d } + ].forEach(cea608Stream.push, cea608Stream); equal(captions.length, 0, 'no caption emitted'); }); - test('backspaces stop at the beginning of the line', function() { + test('a second identical control code immediately following the first is ignored', function() { var captions = []; cea608Stream.on('data', function(caption) { captions.push(caption); }); - // RU2, roll-up captions 2 rows - cea608Stream.push({ ccData: 0x1425 }); - // '01' - cea608Stream.push({ - pts: 0 * 1000, - ccData: characters('01') + [ // RU2, roll-up captions 2 rows + { ccData: 0x1425 }, + // '01' + { + pts: 0 * 1000, + ccData: characters('01') + }, + // '02' + { + pts: 1 * 1000, + ccData: characters('02') + }, + // backspace + { ccData: 0x1421 }, + // backspace + { ccData: 0x1421 }, // duplicate is ignored + // backspace + { ccData: 0x1421 }, + // CR, carriage return + { pts: 2 * 1000, ccData: 0x142d } + ].forEach(cea608Stream.push, cea608Stream); + + equal(captions.length, 1, 'caption emitted'); + equal(captions[0].text, '01', 'only two backspaces processed'); + }); + + test('preable address codes are converted into spaces', function() { + var captions = []; + cea608Stream.on('data', function(caption) { + captions.push(caption); }); - // backspace - cea608Stream.push({ ccData: 0x1421 }); - // backspace - cea608Stream.push({ ccData: 0x1421 }); - // backspace - cea608Stream.push({ ccData: 0x1421 }); - // CR, carriage return - cea608Stream.push({ pts: 1 * 1000, ccData: 0x142d }); + + [ // RU2, roll-up captions 2 rows + { ccData: 0x1425 }, + // '01' + { + pts: 0 * 1000, + ccData: characters('01') + }, + // PAC: row 15, indent 0 + { ccData: 0x1470 }, + // '02' + { + pts: 1 * 1000, + ccData: characters('02') + }, + // CR, carriage return + { pts: 2 * 1000, ccData: 0x142d } + ].forEach(cea608Stream.push, cea608Stream); + + equal(captions.length, 1, 'caption emitted'); + equal(captions[0].text, '01 02', 'PACs are was converted to space'); + }); + test('backspaces stop at the beginning of the line', function() { + var captions = []; + cea608Stream.on('data', function(caption) { + captions.push(caption); + }); + + [ // RU2, roll-up captions 2 rows + { ccData: 0x1425 }, + // '01' + { + pts: 0 * 1000, + ccData: characters('01') + }, + // backspace + { ccData: 0x1421 }, + // backspace + { ccData: 0x1421 }, + // backspace + { ccData: 0x1421 }, + // CR, carriage return + { pts: 1 * 1000, ccData: 0x142d } + ].forEach(cea608Stream.push, cea608Stream); equal(captions.length, 0, 'no caption emitted'); }); From 5246e6ea2aef0301c84810201fa09acdceacb1d2 Mon Sep 17 00:00:00 2001 From: jrivera Date: Thu, 3 Dec 2015 19:01:45 -0500 Subject: [PATCH 2/3] Fix parsing of the size field of ID3 tags * All sizes or lengths in ID3 tags and frames are in "sync-safe integer" format - parse them correctly * Fix tests which were not creating the correct tags --- lib/m2ts/metadata-stream.js | 26 ++++++++++---------------- test/id3-generator.js | 14 ++++++++------ 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/lib/m2ts/metadata-stream.js b/lib/m2ts/metadata-stream.js index 8360f637..f9e43585 100644 --- a/lib/m2ts/metadata-stream.js +++ b/lib/m2ts/metadata-stream.js @@ -25,6 +25,12 @@ parseIso88591 = function(bytes, start, end) { return window.unescape(percentEncode(bytes, start, end)); }, + parseSyncSafeInteger = function (data) { + return (data[0] << 21) | + (data[1] << 14) | + (data[2] << 7) | + (data[3]); + }, tagParsers = { 'TXXX': function(tag) { var i; @@ -142,10 +148,7 @@ // last four bytes of the ID3 header. // The most significant bit of each byte is dropped and the // results concatenated to recover the actual value. - tagSize = (chunk.data[6] << 21) | - (chunk.data[7] << 14) | - (chunk.data[8] << 7) | - (chunk.data[9]); + tagSize = parseSyncSafeInteger(chunk.data.subarray(6, 10)); // ID3 reports the tag size excluding the header but it's more // convenient for our comparisons to include it @@ -176,26 +179,17 @@ if (tag.data[5] & 0x40) { // advance the frame start past the extended header frameStart += 4; // header size field - frameStart += (tag.data[10] << 24) | - (tag.data[11] << 16) | - (tag.data[12] << 8) | - (tag.data[13]); + frameStart += parseSyncSafeInteger(tag.data.subarray(10, 14)); // clip any padding off the end - tagSize -= (tag.data[16] << 24) | - (tag.data[17] << 16) | - (tag.data[18] << 8) | - (tag.data[19]); + tagSize -= parseSyncSafeInteger(tag.data.subarray(16, 20)); } // parse one or more ID3 frames // http://id3.org/id3v2.3.0#ID3v2_frame_overview do { // determine the number of bytes in this frame - frameSize = (tag.data[frameStart + 4] << 24) | - (tag.data[frameStart + 5] << 16) | - (tag.data[frameStart + 6] << 8) | - (tag.data[frameStart + 7]); + frameSize = parseSyncSafeInteger(tag.data.subarray(frameStart + 4, frameStart + 8)); if (frameSize < 1) { return console.log('Malformed ID3 frame encountered. Skipping metadata parsing.'); } diff --git a/test/id3-generator.js b/test/id3-generator.js index 3724e66e..b0c01c2e 100644 --- a/test/id3-generator.js +++ b/test/id3-generator.js @@ -36,10 +36,11 @@ // size is stored as a sequence of four 7-bit integers with the // high bit of each byte set to zero size = result.length - 10; + result[6] = (size >>> 21) & 0x7f; result[7] = (size >>> 14) & 0x7f; - result[8] = (size >>> 7) & 0x7f; - result[9] = (size) & 0x7f; + result[8] = (size >>> 7) & 0x7f; + result[9] = size & 0x7f; return result; }; @@ -56,10 +57,11 @@ // set the size size = result.length - 10; - result[4] = (size >>> 24); - result[5] = (size >>> 16) & 0xff; - result[6] = (size >>> 8) & 0xff; - result[7] = (size) & 0xff; + + result[4] = (size >>> 21) & 0x7f; + result[5] = (size >>> 14) & 0x7f; + result[6] = (size >>> 7) & 0x7f; + result[7] = size & 0x7f; return result; }; From bab6509366c6c529b1893f578b4c64af480d5854 Mon Sep 17 00:00:00 2001 From: jrivera Date: Fri, 4 Dec 2015 13:23:36 -0500 Subject: [PATCH 3/3] Enhancements from code review --- test/caption-stream-test.js | 99 ++++++++++++++++++++++++++----------- 1 file changed, 70 insertions(+), 29 deletions(-) diff --git a/test/caption-stream-test.js b/test/caption-stream-test.js index 969ac494..3c9ff238 100644 --- a/test/caption-stream-test.js +++ b/test/caption-stream-test.js @@ -9,31 +9,64 @@ } }); - test('parses SEIs larger than 255 bytes', function() { + test('parses SEIs messages larger than 255 bytes', function() { var packets = [], data; captionStream.field1_.push = function(packet) { packets.push(packet); }; - data = new Uint8Array(312); - data[0] = 0x01, // payload_type !== user_data_registered_itu_t_t35 - data[1] = 0xFF, // payload_size - data[2] = 0x26 // payload_size - data[296] = 0x04, // payload_type === user_data_registered_itu_t_t35 - data[297] = 0x0d, // payload_size - data[298] = 181, // itu_t_t35_country_code - data[299] = 0x00, - data[300] = 0x31, // itu_t_t35_provider_code - data[301] = 0x47, - data[302] = 0x41, - data[303] = 0x39, - data[304] = 0x34, // user_identifier, "GA94" - data[305] = 0x03, //user_data_type_code, 0x03 is cc_data - data[306] = 0xc1, // process_cc_data, cc_count - data[307] = 0xff, // reserved - data[308] = 0xfc, // cc_valid, cc_type (608, field 1) - data[309] = 0xff, // cc_data_1 with parity bit set - data[310] = 0x0e, // cc_data_2 without parity bit set - data[311] = 0xff // marker_bits + data = new Uint8Array(268); + data[0] = 0x04; // payload_type === user_data_registered_itu_t_t35 + data[1] = 0xff; // payload_size + data[2] = 0x0d; // payload_size + data[3] = 181; // itu_t_t35_country_code + data[4] = 0x00; + data[5] = 0x31; // itu_t_t35_provider_code + data[6] = 0x47; + data[7] = 0x41; + data[8] = 0x39; + data[9] = 0x34; // user_identifier, "GA94" + data[10] = 0x03; //user_data_type_code, 0x03 is cc_data + data[11] = 0xc1; // process_cc_data, cc_count + data[12] = 0xff; // reserved + data[13] = 0xfc; // cc_valid, cc_type (608, field 1) + data[14] = 0xff; // cc_data_1 with parity bit set + data[15] = 0x0e; // cc_data_2 without parity bit set + data[16] = 0xff; // marker_bits + + captionStream.push({ + nalUnitType: 'sei_rbsp', + escapedRBSP: data + }); + captionStream.flush(); + equal(packets.length, 1, 'parsed a caption'); + }); + + test('parses SEIs containing multiple messages', function() { + var packets = [], data; + + captionStream.field1_.push = function(packet) { + packets.push(packet); + }; + + data = new Uint8Array(22); + data[0] = 0x01; // payload_type !== user_data_registered_itu_t_t35 + data[1] = 0x04; // payload_size + data[6] = 0x04; // payload_type === user_data_registered_itu_t_t35 + data[7] = 0x0d; // payload_size + data[8] = 181; // itu_t_t35_country_code + data[9] = 0x00; + data[10] = 0x31; // itu_t_t35_provider_code + data[11] = 0x47; + data[12] = 0x41; + data[13] = 0x39; + data[14] = 0x34; // user_identifier, "GA94" + data[15] = 0x03; //user_data_type_code, 0x03 is cc_data + data[16] = 0xc1; // process_cc_data, cc_count + data[17] = 0xff; // reserved + data[18] = 0xfc; // cc_valid, cc_type (608, field 1) + data[19] = 0xff; // cc_data_1 with parity bit set + data[20] = 0x0e; // cc_data_2 without parity bit set + data[21] = 0xff; // marker_bits captionStream.push({ nalUnitType: 'sei_rbsp', @@ -154,7 +187,7 @@ { ccData: 0x7e7f }, // EOC, End of Caption { pts: 1000, ccData: 0x142f }, - // RCL, resume caption loading + // Send another command so that the second EOC isn't ignored { ccData: 0x1420 }, // EOC, End of Caption, clear the display { pts: 10 * 1000, ccData: 0x142f } @@ -180,7 +213,7 @@ { ccData: characters('hi') }, // EOC, End of Caption. Finished transmitting, begin display { pts: 1000, ccData: 0x142f }, - // RCL, resume caption loading + // Send another command so that the second EOC isn't ignored { ccData: 0x1420 }, // EOC, End of Caption. End display { pts: 10 * 1000, ccData: 0x142f } @@ -220,7 +253,7 @@ { ccData: characters('34') }, // EOC, End of Caption. Display '34' { pts: 3 * 1000, ccData: 0x142f }, - // RCL, resume caption loading + // Send another command so that the second EOC isn't ignored { ccData: 0x1420 }, // EOC, End of Caption { pts: 4 * 1000, ccData: 0x142f } @@ -267,7 +300,7 @@ { ccData: characters('23') }, // EOC, End of Caption { pts: 1 * 1000, ccData: 0x142f }, - // RCL, resume caption loading + // Send another command so that the second EOC isn't ignored { ccData: 0x1420 }, // EOC, End of Caption { pts: 3 * 1000, ccData: 0x142f } @@ -310,7 +343,7 @@ { ccData: characters('23') }, // EOC, End of Caption. Finished transmitting, display '23' { pts: 1 * 1000, ccData: 0x142f }, - // RCL, resume caption loading + // Send another command so that the second EOC isn't ignored { ccData: 0x1420 }, // EOC, End of Caption { pts: 2 * 1000, ccData: 0x142f } @@ -344,7 +377,7 @@ { ccData: characters('01') }, // EOC, End of Caption { pts: 1 * 1000, ccData: 0x142f }, - // RCL, resume caption loading + // Send another command so that the second EOC isn't ignored { ccData: 0x1420 }, // EOC, End of Caption { pts: 2 * 1000, ccData: 0x142f } @@ -580,8 +613,9 @@ }, // backspace { ccData: 0x1421 }, - // backspace - { ccData: 0x1421 }, // duplicate is ignored + // Send another command so that the backspace isn't + // ignored as a duplicate command + { ccData: 0x1425 }, // backspace { ccData: 0x1421 }, // CR, carriage return @@ -650,6 +684,7 @@ equal(captions.length, 1, 'caption emitted'); equal(captions[0].text, '01 02', 'PACs are was converted to space'); }); + test('backspaces stop at the beginning of the line', function() { var captions = []; cea608Stream.on('data', function(caption) { @@ -665,8 +700,14 @@ }, // backspace { ccData: 0x1421 }, + // Send another command so that the backspace isn't + // ignored as a duplicate command + { ccData: 0x1425 }, // backspace { ccData: 0x1421 }, + // Send another command so that the backspace isn't + // ignored as a duplicate command + { ccData: 0x1425 }, // backspace { ccData: 0x1421 }, // CR, carriage return