Skip to content

Commit

Permalink
pem-rfc7468: refactor decoder (RustCrypto#41)
Browse files Browse the repository at this point in the history
- Moves decoding of encapsulated text to an inherent method of
  `Encapsulation`
- Changes the return type of this method to be the portion of the buffer
  containing the decoded data
  • Loading branch information
tarcieri authored Sep 17, 2021
1 parent a1b9c44 commit 236a131
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 89 deletions.
66 changes: 32 additions & 34 deletions pem-rfc7468/src/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,8 @@ use core::{convert::TryFrom, str};
pub fn decode<'i, 'o>(pem: &'i [u8], buf: &'o mut [u8]) -> Result<(&'i str, &'o [u8])> {
let encapsulation = Encapsulation::try_from(pem)?;
let label = encapsulation.label();
let mut out_len = 0;
decode_encapsulated_text(&encapsulation, buf, &mut out_len)?;
Ok((label, &buf[..out_len]))
let decoded_bytes = encapsulation.decode(buf)?;
Ok((label, decoded_bytes))
}

/// Decode a PEM document according to RFC 7468's "Strict" grammar, returning
Expand All @@ -40,16 +39,16 @@ pub fn decode<'i, 'o>(pem: &'i [u8], buf: &'o mut [u8]) -> Result<(&'i str, &'o
pub fn decode_vec(pem: &[u8]) -> Result<(&str, Vec<u8>)> {
let encapsulation = Encapsulation::try_from(pem)?;
let label = encapsulation.label();

// count all chars (gives over-estimation, due to whitespace)
let max_len = encapsulation.encapsulated_text.len() * 3 / 4;
let mut result = vec![0u8; max_len];
let mut actual_len = 0;

decode_encapsulated_text(&encapsulation, &mut result, &mut actual_len)?;
let mut result = vec![0u8; max_len];
let decoded_len = encapsulation.decode(&mut result)?.len();

// Actual encoded length can be slightly shorter than estimated
// TODO(tarcieri): more reliable length estimation
result.truncate(actual_len);
result.truncate(decoded_len);
Ok((label, result))
}

Expand All @@ -60,33 +59,6 @@ pub fn decode_label(pem: &[u8]) -> Result<&str> {
Ok(Encapsulation::try_from(pem)?.label())
}

/// Decode the "encapsulated text", i.e. Base64-encoded data which lies between
/// the pre/post-encapsulation boundaries.
fn decode_encapsulated_text<'i, 'o>(
encapsulation: &Encapsulation<'i>,
buf: &'o mut [u8],
out_len: &mut usize,
) -> Result<()> {
for line in encapsulation.encapsulated_text() {
let line = line?;

match Base64::decode(line, &mut buf[*out_len..]) {
Err(error) => {
// in the case that we are decoding the first line
// and we error, then attribute the error to an unsupported header
// if a colon char is present in the line
if *out_len == 0 && line.iter().any(|&b| b == grammar::CHAR_COLON) {
return Err(Error::HeaderDisallowed);
} else {
return Err(error.into());
}
}
Ok(out) => *out_len += out.len(),
}
}
Ok(())
}

/// PEM encapsulation parser.
///
/// This parser performs an initial pass over the data, locating the
Expand Down Expand Up @@ -179,6 +151,32 @@ impl<'a> Encapsulation<'a> {
bytes: self.encapsulated_text,
}
}

/// Decode the "encapsulated text", i.e. Base64-encoded data which lies between
/// the pre/post-encapsulation boundaries.
fn decode<'o>(&self, buf: &'o mut [u8]) -> Result<&'o [u8]> {
let mut out_len = 0;

for line in self.encapsulated_text() {
let line = line?;

match Base64::decode(line, &mut buf[out_len..]) {
Err(error) => {
// in the case that we are decoding the first line
// and we error, then attribute the error to an unsupported header
// if a colon char is present in the line
if out_len == 0 && line.iter().any(|&b| b == grammar::CHAR_COLON) {
return Err(Error::HeaderDisallowed);
} else {
return Err(error.into());
}
}
Ok(out) => out_len += out.len(),
}
}

Ok(&buf[..out_len])
}
}

impl<'a> TryFrom<&'a [u8]> for Encapsulation<'a> {
Expand Down
110 changes: 55 additions & 55 deletions pem-rfc7468/src/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,61 +39,6 @@ pub(crate) fn is_wsp(char: u8) -> bool {
matches!(char, CHAR_HT | CHAR_SP)
}

/// Split a slice beginning with a type label as located in an encapsulation
/// boundary. Returns the label as a `&str`, and slice beginning with the
/// encapsulated text with leading `-----` and newline removed.
///
/// This implementation follows the rules put forth in Section 2, which are
/// stricter than those found in the ABNF grammar:
///
/// > Labels are formally case-sensitive, uppercase, and comprised of zero or more
/// > characters; they do not contain consecutive spaces or hyphen-minuses,
/// > nor do they contain spaces or hyphen-minuses at either end.
///
/// We apply a slightly stricter interpretation:
/// - Labels MAY be empty
/// - Non-empty labels MUST start with an upper-case letter: `'A'..='Z'`
/// - The only allowable characters subsequently are `'A'..='Z'` or WSP.
/// (NOTE: this is an overly strict initial implementation and should be relaxed)
/// - Whitespace MUST NOT contain more than one consecutive WSP character
// TODO(tarcieri): evaluate whether this is too strict; support '-'
pub(crate) fn split_label(bytes: &[u8]) -> Option<(&str, &[u8])> {
let mut n = 0;

// TODO(tarcieri): handle hyphens in labels as well as spaces
let mut last_was_wsp = false;

for &char in bytes {
// Validate character
// TODO(tarcieri): unify with `is_labelchar`/`validate_label`
if matches!(char, b'A'..=b'Z') {
last_was_wsp = false;
} else if char == b'-' {
// Possible start of encapsulation boundary delimiter
break;
} else if n != 0 && is_wsp(char) {
// Repeated whitespace disallowed
if last_was_wsp {
return None;
}

last_was_wsp = true;
} else {
return None;
}

n += 1;
}

let (raw_label, rest) = bytes.split_at(n);
let label = str::from_utf8(raw_label).ok()?;

match rest {
[b'-', b'-', b'-', b'-', b'-', body @ ..] => Some((label, strip_leading_eol(body)?)),
_ => None,
}
}

/// Strip the "preamble", i.e. data that appears before the PEM
/// pre-encapsulation boundary.
///
Expand Down Expand Up @@ -161,6 +106,61 @@ pub(crate) fn strip_trailing_eol(bytes: &[u8]) -> Option<&[u8]> {
}
}

/// Split a slice beginning with a type label as located in an encapsulation
/// boundary. Returns the label as a `&str`, and slice beginning with the
/// encapsulated text with leading `-----` and newline removed.
///
/// This implementation follows the rules put forth in Section 2, which are
/// stricter than those found in the ABNF grammar:
///
/// > Labels are formally case-sensitive, uppercase, and comprised of zero or more
/// > characters; they do not contain consecutive spaces or hyphen-minuses,
/// > nor do they contain spaces or hyphen-minuses at either end.
///
/// We apply a slightly stricter interpretation:
/// - Labels MAY be empty
/// - Non-empty labels MUST start with an upper-case letter: `'A'..='Z'`
/// - The only allowable characters subsequently are `'A'..='Z'` or WSP.
/// (NOTE: this is an overly strict initial implementation and should be relaxed)
/// - Whitespace MUST NOT contain more than one consecutive WSP character
// TODO(tarcieri): evaluate whether this is too strict; support '-'
pub(crate) fn split_label(bytes: &[u8]) -> Option<(&str, &[u8])> {
let mut n = 0;

// TODO(tarcieri): handle hyphens in labels as well as spaces
let mut last_was_wsp = false;

for &char in bytes {
// Validate character
// TODO(tarcieri): unify with `is_labelchar`/`validate_label`
if matches!(char, b'A'..=b'Z') {
last_was_wsp = false;
} else if char == b'-' {
// Possible start of encapsulation boundary delimiter
break;
} else if n != 0 && is_wsp(char) {
// Repeated whitespace disallowed
if last_was_wsp {
return None;
}

last_was_wsp = true;
} else {
return None;
}

n += 1;
}

let (raw_label, rest) = bytes.split_at(n);
let label = str::from_utf8(raw_label).ok()?;

match rest {
[b'-', b'-', b'-', b'-', b'-', body @ ..] => Some((label, strip_leading_eol(body)?)),
_ => None,
}
}

/// Validate that the given bytes are allowed as a PEM type label, i.e. the
/// label encoded in the `BEGIN` and `END` encapsulation boundaries.
pub(crate) fn validate_label(label: &[u8]) -> Result<()> {
Expand Down

0 comments on commit 236a131

Please sign in to comment.