Skip to content
This repository has been archived by the owner on Aug 23, 2018. It is now read-only.

Commit

Permalink
Raw header optimisations: smallvec and tendril.
Browse files Browse the repository at this point in the history
I’m planning to use tendril for zero-copy header handling; while I’m on
the topic of optimisation, optimising for the most common case of a
header occurring only once in a message by using a
`SmallVec<[ByteTendril; 1]>` there makes sense too.

End result: the average raw header will (when combined with a
tendril-backed parser) require zero allocations instead of three (header
name, vector of values and value). This should be a comparatively large
performance win.
  • Loading branch information
chris-morgan committed Jul 19, 2015
1 parent adfed15 commit a583f14
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 31 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ mucell = "0.3.2"
phf = "*"
phf_macros = "*"
mopa = "*"
tendril = {version = "*", git = "https://github.com/chris-morgan/tendril", branch = "impl-borrow-for-tendril"}
smallvec = "*"
54 changes: 35 additions & 19 deletions src/headers/internals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,24 @@ use std::fmt;
use std::mem;
use std::slice;

use tendril::{ByteTendril, StrTendril};
use smallvec::SmallVec;

use mucell::{MuCell, Ref};

use super::{ToHeader, Header, HeaderDisplayAdapter};

// Nothing even remotely fancy here like counting how many items,
// because I don’t need it in my simple cases.
macro_rules! smallvec {
[] => (SmallVec::new());
[$($x:expr),*] => {{
let mut _small_vec = SmallVec::new();
$(_small_vec.push($x);)*
_small_vec
}};
}

/// All the header field values, raw or typed, with a shared field name.
///
/// Each item can contain a raw and a typed representation in a combination defined below; for
Expand Down Expand Up @@ -49,7 +63,7 @@ struct Inner {
/// which were equivalent. Each inner vector is opaque data with no restrictions except that CR
/// and LF may not appear, unless as part of an obs-fold rule (extremely much not recommended),
/// though it is also recommended by RFC 7230 that it be US-ASCII.
raw: Option<Vec<Vec<u8>>>,
raw: Option<SmallVec<[ByteTendril; 1]>>,

/// A strongly typed header which has been parsed from the raw value.
typed: Typed,
Expand Down Expand Up @@ -176,7 +190,7 @@ impl<T: Iterator> MyIteratorExt for T {

struct ValueListIter<'a> {
current_line: Option<&'a [u8]>,
lines: slice::Iter<'a, Vec<u8>>,
lines: slice::Iter<'a, ByteTendril>,
}

macro_rules! DEBUG { ($($x:tt)*) => (println!($($x)*)) }
Expand Down Expand Up @@ -317,7 +331,7 @@ trait RawHeaderExt {
fn to_value_list_iter(&self) -> ValueListIter;
}

impl RawHeaderExt for [Vec<u8>] {
impl RawHeaderExt for [ByteTendril] {
fn to_value_list_iter(&self) -> ValueListIter {
ValueListIter {
current_line: None,
Expand All @@ -331,11 +345,13 @@ macro_rules! value_list_iter_tests {
#[cfg(test)]
mod value_list_iter_tests {
use super::RawHeaderExt;
use tendril::ByteTendril;
$(
#[test]
fn $name() {
let input: &[&[u8]] = &$input;
let input = input.iter().map(|x| x.to_vec()).collect::<Vec<_>>();
let input = input.iter().map(|&x| ByteTendril::from(x))
.collect::<Vec<ByteTendril>>();
let expected: &[&[u8]] = &$expected;
let computed = input.to_value_list_iter().collect::<Vec<_>>();
assert_eq!(&computed[..], expected);
Expand Down Expand Up @@ -365,19 +381,19 @@ value_list_iter_tests! {
}

impl Inner {
fn raw_mut(&mut self, invalidate_others: bool) -> &mut Vec<Vec<u8>> {
fn raw_mut(&mut self, invalidate_others: bool) -> &mut SmallVec<[ByteTendril; 1]> {
if self.raw.is_none() {
self.raw = Some(if invalidate_others {
match mem::replace(&mut self.typed, Typed::None) {
Typed::None => vec![],
Typed::Single(single) => vec![single.into_raw()],
Typed::List(list) => vec![list.to_raw()],
Typed::None => smallvec![],
Typed::Single(single) => smallvec![single.into_raw()],
Typed::List(list) => smallvec![list.to_raw()],
}
} else {
match self.typed {
Typed::None => vec![],
Typed::Single(ref single) => vec![single.to_raw()],
Typed::List(ref list) => vec![list.to_raw()],
Typed::None => smallvec![],
Typed::Single(ref single) => smallvec![single.to_raw()],
Typed::List(ref list) => smallvec![list.to_raw()],
}
});
}
Expand All @@ -388,7 +404,7 @@ impl Inner {
}

// Moo!
fn raw_cow(&self) -> Option<Cow<[Vec<u8>]>> {
fn raw_cow(&self) -> Option<Cow<[ByteTendril]>> {
match self.raw {
Some(ref vec) => Some(Cow::Borrowed(&vec[..])),
None => match self.typed {
Expand Down Expand Up @@ -531,7 +547,7 @@ impl<'a, H: ToHeader + Header + Clone> TypedListRef<'a, H> {

impl Item {
/// Construct a new Item from a raw representation.
pub fn from_raw(raw: Vec<Vec<u8>>) -> Item {
pub fn from_raw(raw: SmallVec<[ByteTendril; 1]>) -> Item {
assert!(raw.len() > 0);
Item {
inner: MuCell::new(Inner {
Expand Down Expand Up @@ -583,7 +599,7 @@ impl Item {
/// fashion, it will be parsed from the raw form.
///
/// Only use this if you need to mutate the raw form; if you don't, use `raw`.
pub fn raw_mut(&mut self) -> &mut Vec<Vec<u8>> {
pub fn raw_mut(&mut self) -> &mut SmallVec<[ByteTendril; 1]> {
self.inner.borrow_mut().raw_mut(true)
}

Expand All @@ -597,15 +613,15 @@ impl Item {
/// dereference to get your raw reference.
///
/// See also `raw_mut`, if you wish to mutate the raw representation.
pub fn raw(&self) -> Option<Ref<Cow<[Vec<u8>]>>> {
pub fn raw(&self) -> Option<Ref<Cow<[ByteTendril]>>> {
self.inner.try_mutate(|inner| { let _ = inner.raw_mut(false); });
Ref::filter_map(self.inner.borrow(), |inner| inner.raw_cow())
}

/// Set the raw form of the header.
///
/// This invalidates the typed representation.
pub fn set_raw(&mut self, raw: Vec<Vec<u8>>) {
pub fn set_raw(&mut self, raw: SmallVec<[ByteTendril; 1]>) {
let inner = self.inner.borrow_mut();
inner.raw = Some(raw);
inner.typed = Typed::None;
Expand Down Expand Up @@ -711,11 +727,11 @@ impl<'a, T: ToHeader + Header + Clone> Get<'a> for TypedListRef<'a, T> {

#[doc(hidden)]
pub trait GetMut<'a> {
fn get_mut(entry: hash_map::Entry<'a, Cow<'static, str>, Item>) -> Self;
fn get_mut(entry: hash_map::Entry<'a, StrTendril, Item>) -> Self;
}

impl<'a, T: ToHeader + Header + Clone> GetMut<'a> for Option<&'a mut T> {
fn get_mut(entry: hash_map::Entry<'a, Cow<'static, str>, Item>) -> Self {
fn get_mut(entry: hash_map::Entry<'a, StrTendril, Item>) -> Self {
match entry {
hash_map::Entry::Occupied(entry) => entry.into_mut().single_typed_mut(),
hash_map::Entry::Vacant(_) => None,
Expand All @@ -724,7 +740,7 @@ impl<'a, T: ToHeader + Header + Clone> GetMut<'a> for Option<&'a mut T> {
}

impl<'a, T: ToHeader + Header + Clone> GetMut<'a> for &'a mut Vec<T> {
fn get_mut(entry: hash_map::Entry<'a, Cow<'static, str>, Item>) -> Self {
fn get_mut(entry: hash_map::Entry<'a, StrTendril, Item>) -> Self {
entry.or_insert_with(|| Item::from_list_typed::<T>(vec![])).list_typed_mut()
}
}
Expand Down
32 changes: 20 additions & 12 deletions src/headers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ use std::mem;
use std::collections::hash_map::HashMap;
use std::collections::hash_map::Entry::{Occupied, Vacant};

use tendril::{ByteTendril, StrTendril};
use smallvec::SmallVec;

use self::internals::Item;
pub use mucell::Ref;
pub use self::internals::TypedListRef;
Expand Down Expand Up @@ -56,8 +59,13 @@ pub trait Header: Any + HeaderClone {
/// present that there may be cases where there is a better choice. It might be shifted out of
/// the trait later.
// unstable: might be removed from the trait
fn to_raw(&self) -> Vec<u8> {
format!("{}", HeaderDisplayAdapter(&*self)).into_bytes()
fn to_raw(&self) -> ByteTendril {
use std::io::Write;

let mut out = ByteTendril::new();
// Meh, nothing is allowed to go wrong here.
let _ = write!(out, "{}", HeaderDisplayAdapter(&*self));
out
}

/// Convert the header to its raw value, consuming self.
Expand All @@ -70,7 +78,7 @@ pub trait Header: Any + HeaderClone {
/// present that there may be cases where there is a better choice. It might be shifted out of
/// the trait later.
// unstable: might be removed from the trait
fn into_raw(self: Box<Self>) -> Vec<u8> {
fn into_raw(self: Box<Self>) -> ByteTendril {
self.to_raw()
}
}
Expand Down Expand Up @@ -450,7 +458,7 @@ impl Header for &'static Header {
/// item.
#[derive(PartialEq)]
pub struct Headers {
data: HashMap<Cow<'static, str>, Item>,
data: HashMap<StrTendril, Item>,
}

impl Headers {
Expand All @@ -465,7 +473,7 @@ impl Headers {
///
/// The interface is strongly typed; see TODO for a more detailed explanation of how it works.
pub fn get<'a, M: Marker<'a>>(&'a self, _marker: M) -> M::Get {
internals::Get::get(self.data.get(M::header_name()))
internals::Get::get(self.data.get(M::header_name().as_bytes()))
}

/// Get a mutable reference to a header value.
Expand Down Expand Up @@ -512,8 +520,8 @@ impl Headers {
///
/// The returned value is a slice of each header field value.
#[inline]
pub fn get_raw<'a, M: Marker<'a>>(&'a self, _marker: M) -> Option<Ref<Cow<[Vec<u8>]>>> {
self.data.get(M::header_name()).and_then(|item| item.raw())
pub fn get_raw<'a, M: Marker<'a>>(&'a self, _marker: M) -> Option<Ref<Cow<[ByteTendril]>>> {
self.data.get(M::header_name().as_bytes()).and_then(|item| item.raw())
}

/// Get a mutable reference to the raw values of a header, by name.
Expand All @@ -522,15 +530,15 @@ impl Headers {
#[inline]
pub fn get_raw_mut<'a, M: Marker<'a>>
(&'a mut self, _marker: M)
-> Option<&mut Vec<Vec<u8>>> {
self.data.get_mut(M::header_name()).map(|item| item.raw_mut())
-> Option<&mut SmallVec<[ByteTendril; 1]>> {
self.data.get_mut(M::header_name().as_bytes()).map(|item| item.raw_mut())
}

/// Set the raw value of a header, by name.
///
/// This invalidates the typed representation.
#[inline]
pub fn set_raw<'a, M: Marker<'a>>(&'a mut self, _marker: M, value: Vec<Vec<u8>>) {
pub fn set_raw<'a, M: Marker<'a>>(&'a mut self, _marker: M, value: SmallVec<[ByteTendril; 1]>) {
match self.data.entry(M::header_name().into()) {
Vacant(entry) => { let _ = entry.insert(Item::from_raw(value)); },
Occupied(entry) => entry.into_mut().set_raw(value),
Expand All @@ -540,12 +548,12 @@ impl Headers {
/// Remove a header from the collection.
/// Returns true if the named header was present.
pub fn remove<'a, M: Marker<'a>>(&'a mut self, _marker: M) -> bool {
self.data.remove(M::header_name()).is_some()
self.data.remove(M::header_name().as_bytes()).is_some()
}

/// Returns true if the named header exists in the collection.
pub fn contains<'a, M: Marker<'a>>(&'a self, _marker: M) -> bool {
match self.data.get(M::header_name()) {
match self.data.get(M::header_name().as_bytes()) {
Some(item) => item.is_valid(),
None => false,
}
Expand Down
3 changes: 3 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ extern crate mucell;
#[macro_use]
extern crate mopa;

extern crate tendril;
extern crate smallvec;

pub mod method;
pub mod status;
pub mod headers;
Expand Down

0 comments on commit a583f14

Please sign in to comment.