pem-rfc7468: refactor decoder (RustCrypto#41)

- Moves decoding of encapsulated text to an inherent method of `Encapsulation` - Changes the return type of this method to be the portion of the buffer containing the decoded data
janimo · Sep 17, 2021 · 236a131 · 236a131
1 parent a1b9c44
commit 236a131
Show file tree

Hide file tree

Showing 2 changed files with 87 additions and 89 deletions.
diff --git a/pem-rfc7468/src/decoder.rs b/pem-rfc7468/src/decoder.rs
@@ -28,9 +28,8 @@ use core::{convert::TryFrom, str};
 pub fn decode<'i, 'o>(pem: &'i [u8], buf: &'o mut [u8]) -> Result<(&'i str, &'o [u8])> {
     let encapsulation = Encapsulation::try_from(pem)?;
     let label = encapsulation.label();
-    let mut out_len = 0;
-    decode_encapsulated_text(&encapsulation, buf, &mut out_len)?;
-    Ok((label, &buf[..out_len]))
+    let decoded_bytes = encapsulation.decode(buf)?;
+    Ok((label, decoded_bytes))
 }
 
 /// Decode a PEM document according to RFC 7468's "Strict" grammar, returning
@@ -40,16 +39,16 @@ pub fn decode<'i, 'o>(pem: &'i [u8], buf: &'o mut [u8]) -> Result<(&'i str, &'o
 pub fn decode_vec(pem: &[u8]) -> Result<(&str, Vec<u8>)> {
     let encapsulation = Encapsulation::try_from(pem)?;
     let label = encapsulation.label();
+
     // count all chars (gives over-estimation, due to whitespace)
     let max_len = encapsulation.encapsulated_text.len() * 3 / 4;
-    let mut result = vec![0u8; max_len];
-    let mut actual_len = 0;
 
-    decode_encapsulated_text(&encapsulation, &mut result, &mut actual_len)?;
+    let mut result = vec![0u8; max_len];
+    let decoded_len = encapsulation.decode(&mut result)?.len();
 
     // Actual encoded length can be slightly shorter than estimated
     // TODO(tarcieri): more reliable length estimation
-    result.truncate(actual_len);
+    result.truncate(decoded_len);
     Ok((label, result))
 }
 
@@ -60,33 +59,6 @@ pub fn decode_label(pem: &[u8]) -> Result<&str> {
     Ok(Encapsulation::try_from(pem)?.label())
 }
 
-/// Decode the "encapsulated text", i.e. Base64-encoded data which lies between
-/// the pre/post-encapsulation boundaries.
-fn decode_encapsulated_text<'i, 'o>(
-    encapsulation: &Encapsulation<'i>,
-    buf: &'o mut [u8],
-    out_len: &mut usize,
-) -> Result<()> {
-    for line in encapsulation.encapsulated_text() {
-        let line = line?;
-
-        match Base64::decode(line, &mut buf[*out_len..]) {
-            Err(error) => {
-                // in the case that we are decoding the first line
-                // and we error, then attribute the error to an unsupported header
-                // if a colon char is present in the line
-                if *out_len == 0 && line.iter().any(|&b| b == grammar::CHAR_COLON) {
-                    return Err(Error::HeaderDisallowed);
-                } else {
-                    return Err(error.into());
-                }
-            }
-            Ok(out) => *out_len += out.len(),
-        }
-    }
-    Ok(())
-}
-
 /// PEM encapsulation parser.
 ///
 /// This parser performs an initial pass over the data, locating the
@@ -179,6 +151,32 @@ impl<'a> Encapsulation<'a> {
             bytes: self.encapsulated_text,
         }
     }
+
+    /// Decode the "encapsulated text", i.e. Base64-encoded data which lies between
+    /// the pre/post-encapsulation boundaries.
+    fn decode<'o>(&self, buf: &'o mut [u8]) -> Result<&'o [u8]> {
+        let mut out_len = 0;
+
+        for line in self.encapsulated_text() {
+            let line = line?;
+
+            match Base64::decode(line, &mut buf[out_len..]) {
+                Err(error) => {
+                    // in the case that we are decoding the first line
+                    // and we error, then attribute the error to an unsupported header
+                    // if a colon char is present in the line
+                    if out_len == 0 && line.iter().any(|&b| b == grammar::CHAR_COLON) {
+                        return Err(Error::HeaderDisallowed);
+                    } else {
+                        return Err(error.into());
+                    }
+                }
+                Ok(out) => out_len += out.len(),
+            }
+        }
+
+        Ok(&buf[..out_len])
+    }
 }
 
 impl<'a> TryFrom<&'a [u8]> for Encapsulation<'a> {

diff --git a/pem-rfc7468/src/grammar.rs b/pem-rfc7468/src/grammar.rs
@@ -39,61 +39,6 @@ pub(crate) fn is_wsp(char: u8) -> bool {
     matches!(char, CHAR_HT | CHAR_SP)
 }
 
-/// Split a slice beginning with a type label as located in an encapsulation
-/// boundary. Returns the label as a `&str`, and slice beginning with the
-/// encapsulated text with leading `-----` and newline removed.
-///
-/// This implementation follows the rules put forth in Section 2, which are
-/// stricter than those found in the ABNF grammar:
-///
-/// > Labels are formally case-sensitive, uppercase, and comprised of zero or more
-/// > characters; they do not contain consecutive spaces or hyphen-minuses,
-/// > nor do they contain spaces or hyphen-minuses at either end.
-///
-/// We apply a slightly stricter interpretation:
-/// - Labels MAY be empty
-/// - Non-empty labels MUST start with an upper-case letter: `'A'..='Z'`
-/// - The only allowable characters subsequently are `'A'..='Z'` or WSP.
-///   (NOTE: this is an overly strict initial implementation and should be relaxed)
-/// - Whitespace MUST NOT contain more than one consecutive WSP character
-// TODO(tarcieri): evaluate whether this is too strict; support '-'
-pub(crate) fn split_label(bytes: &[u8]) -> Option<(&str, &[u8])> {
-    let mut n = 0;
-
-    // TODO(tarcieri): handle hyphens in labels as well as spaces
-    let mut last_was_wsp = false;
-
-    for &char in bytes {
-        // Validate character
-        // TODO(tarcieri): unify with `is_labelchar`/`validate_label`
-        if matches!(char, b'A'..=b'Z') {
-            last_was_wsp = false;
-        } else if char == b'-' {
-            // Possible start of encapsulation boundary delimiter
-            break;
-        } else if n != 0 && is_wsp(char) {
-            // Repeated whitespace disallowed
-            if last_was_wsp {
-                return None;
-            }
-
-            last_was_wsp = true;
-        } else {
-            return None;
-        }
-
-        n += 1;
-    }
-
-    let (raw_label, rest) = bytes.split_at(n);
-    let label = str::from_utf8(raw_label).ok()?;
-
-    match rest {
-        [b'-', b'-', b'-', b'-', b'-', body @ ..] => Some((label, strip_leading_eol(body)?)),
-        _ => None,
-    }
-}
-
 /// Strip the "preamble", i.e. data that appears before the PEM
 /// pre-encapsulation boundary.
 ///
@@ -161,6 +106,61 @@ pub(crate) fn strip_trailing_eol(bytes: &[u8]) -> Option<&[u8]> {
     }
 }
 
+/// Split a slice beginning with a type label as located in an encapsulation
+/// boundary. Returns the label as a `&str`, and slice beginning with the
+/// encapsulated text with leading `-----` and newline removed.
+///
+/// This implementation follows the rules put forth in Section 2, which are
+/// stricter than those found in the ABNF grammar:
+///
+/// > Labels are formally case-sensitive, uppercase, and comprised of zero or more
+/// > characters; they do not contain consecutive spaces or hyphen-minuses,
+/// > nor do they contain spaces or hyphen-minuses at either end.
+///
+/// We apply a slightly stricter interpretation:
+/// - Labels MAY be empty
+/// - Non-empty labels MUST start with an upper-case letter: `'A'..='Z'`
+/// - The only allowable characters subsequently are `'A'..='Z'` or WSP.
+///   (NOTE: this is an overly strict initial implementation and should be relaxed)
+/// - Whitespace MUST NOT contain more than one consecutive WSP character
+// TODO(tarcieri): evaluate whether this is too strict; support '-'
+pub(crate) fn split_label(bytes: &[u8]) -> Option<(&str, &[u8])> {
+    let mut n = 0;
+
+    // TODO(tarcieri): handle hyphens in labels as well as spaces
+    let mut last_was_wsp = false;
+
+    for &char in bytes {
+        // Validate character
+        // TODO(tarcieri): unify with `is_labelchar`/`validate_label`
+        if matches!(char, b'A'..=b'Z') {
+            last_was_wsp = false;
+        } else if char == b'-' {
+            // Possible start of encapsulation boundary delimiter
+            break;
+        } else if n != 0 && is_wsp(char) {
+            // Repeated whitespace disallowed
+            if last_was_wsp {
+                return None;
+            }
+
+            last_was_wsp = true;
+        } else {
+            return None;
+        }
+
+        n += 1;
+    }
+
+    let (raw_label, rest) = bytes.split_at(n);
+    let label = str::from_utf8(raw_label).ok()?;
+
+    match rest {
+        [b'-', b'-', b'-', b'-', b'-', body @ ..] => Some((label, strip_leading_eol(body)?)),
+        _ => None,
+    }
+}
+
 /// Validate that the given bytes are allowed as a PEM type label, i.e. the
 /// label encoded in the `BEGIN` and `END` encapsulation boundaries.
 pub(crate) fn validate_label(label: &[u8]) -> Result<()> {