Skip to content

Commit

Permalink
[rust] Genericize flexbuffer reader (google#6450)
Browse files Browse the repository at this point in the history
* feature/rust-tokio-bytes added feature name for tokio-bytes

* Added flexbuffer implementation, TODO: typecast to avoid recurse

* Converted codebase to utilize FlexBuffer implementation, need to resolve deserialization issues

* Added todo for lifetime issue, may use &'de [u8] for deserializer instead of current method

* Added proper &[u8] implementation

* Removed unused struct

* Added experimental fix to get_slice

* Added experimental fix to get_slice

* Avoided lifetime issues via ref structs, need to check if this hurts peformance

* Updated deserializer implementation to allow for borrowed data from Reader struct

* Fixed bug with str

* Removed unnecessary generic parameter

* Added unsafe to avoid lifetime complaints, current tests pass, need to review alternatives to unsafe

* Opinionated: Removed bytes crate as this implementation could be done in a separate crate

* Cleaned up flatbuffer

* Fixed sample / example

* Resolved PR feedback, need to resolve issues with tests

* Cleaned up FlexBuffer trait to be an auto impl

* Removed TODO

* Reverted Deserializer to only support &'de [u8]

* Cleaned up / renamed function for clarification

* Renamed FlexBuffer -> InternalBuffer for clarification on it's purpose

* Fixed issue with key bytes

* resolved issues with broken tests, confirming this is a breaking change

* Removed FIXME that's solved by splitting String and Key variants

* Implemented associated types approach

* Fixed backward slice logic

* Fixed MapReader compile error

* Added from_buffer for deserialization, removed  function since it's only needed for deserialization

* Removed dead code

* Cleaned up buffer, removed AsRef in favor of Deref

* Renamed Buffer::as_str -> Buffer::buffer_str

* Minor cleanup

* Updated documentation, need to fix tests

* Removed unnecessary &

* Removed unused lifetime

* removed unnecessary as_ref

* Minor optimization wrap-up

* resolved issue with Clone

* Added test to verify no deep-copy

* Added  for optimization

* Updated to use empty fn instead of default

* Updated comments / test name - plus the 0.3.0 version bump

* comment
  • Loading branch information
colindjk authored Feb 16, 2021
1 parent a20f606 commit 4174c10
Show file tree
Hide file tree
Showing 14 changed files with 385 additions and 128 deletions.
3 changes: 1 addition & 2 deletions rust/flexbuffers/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "flexbuffers"
version = "0.2.2"
version = "0.3.0"
authors = ["Casper Neo <[email protected]>", "FlatBuffers Maintainers"]
edition = "2018"
license = "Apache-2.0"
Expand All @@ -20,7 +20,6 @@ serialize_human_readable = []
# You basically never need this to be true unless reading data from old binaries.
deserialize_human_readable = []


[dependencies]
serde = "1.0.119"
serde_derive = "1.0.119"
Expand Down
81 changes: 81 additions & 0 deletions rust/flexbuffers/src/buffer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
use std::ops::{Deref, Range};

/// The underlying buffer that is used by a flexbuffer Reader.
///
/// This allows for custom buffer implementations as long as they can be viewed as a &[u8].
pub trait Buffer: Deref<Target = [u8]> + Sized {
// The `BufferString` allows for a buffer to return a custom string which will have the
// lifetime of the underlying buffer. A simple `std::str::from_utf8` wouldn't work since that
// returns a &str, which is then owned by the callee (cannot be returned from a function).
//
// Example: During deserialization a `BufferString` is returned, allowing the deserializer
// to "borrow" the given str - b/c there is a "lifetime" guarantee, so to speak, from the
// underlying buffer.
/// A BufferString which will live at least as long as the Buffer itself.
///
/// Deref's to UTF-8 `str`, and only generated from the `buffer_str` function Result.
type BufferString: Deref<Target = str> + Sized;

/// This method returns an instance of type Self. This allows for lifetimes to be tracked
/// in cases of deserialization.
///
/// It also lets custom buffers manage reference counts.
///
/// Returns None if:
/// - range start is greater than end
/// - range end is out of bounds
///
/// This operation should be fast -> O(1), ideally with no heap allocations.
fn slice(&self, range: Range<usize>) -> Option<Self>;

/// Creates a shallow copy of the given buffer, similar to `slice`.
///
/// This operation should be fast -> O(1), ideally with no heap allocations.
#[inline]
fn shallow_copy(&self) -> Self {
self.slice(0..self.len()).unwrap()
}

/// Creates an empty instance of a `Buffer`. This is different than `Default` b/c it
/// guarantees that the buffer instance will have length zero.
///
/// Most impls shold be able to implement this via `Default`.
fn empty() -> Self;

/// Based off of the `empty` function, allows override for optimization purposes.
#[inline]
fn empty_str() -> Self::BufferString {
Self::empty().buffer_str().unwrap()
}

/// Attempts to convert the given buffer to a custom string type.
///
/// This should fail if the type does not have valid UTF-8 bytes, and must be zero copy.
fn buffer_str(&self) -> Result<Self::BufferString, std::str::Utf8Error>;
}

impl<'de> Buffer for &'de [u8] {
type BufferString = &'de str;

#[inline]
fn slice(&self, range: Range<usize>) -> Option<Self> {
self.get(range)
}

#[inline]
fn empty() -> Self {
&[]
}

/// Based off of the `empty` function, allows override for optimization purposes.
#[inline]
fn empty_str() -> Self::BufferString {
&""
}

#[inline]
fn buffer_str(&self) -> Result<Self::BufferString, std::str::Utf8Error> {
std::str::from_utf8(self)
}
}

9 changes: 5 additions & 4 deletions rust/flexbuffers/src/builder/push.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@

use crate::builder::Builder;
use crate::private::Sealed;
use crate::{Blob, IndirectFloat, IndirectInt, IndirectUInt};
use crate::{Blob, Buffer, IndirectFloat, IndirectInt, IndirectUInt};

impl<'a> Sealed for Blob<'a> {}
impl<B: Buffer> Sealed for Blob<B> {}
impl Sealed for () {}

// TODO: String interning
Expand Down Expand Up @@ -44,9 +44,10 @@ impl Pushable for () {
builder.push_null();
}
}
impl<'a> Pushable for Blob<'a> {

impl<B: Buffer> Pushable for Blob<B> {
fn push_to_builder(self, builder: &mut Builder) {
builder.push_blob(self.0);
builder.push_blob(&self.0);
}
}

Expand Down
22 changes: 20 additions & 2 deletions rust/flexbuffers/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,15 @@ mod bitwidth;
mod builder;
mod flexbuffer_type;
mod reader;
mod buffer;

pub use bitwidth::BitWidth;
pub use builder::Error as SerializationError;
pub use builder::{
singleton, Builder, BuilderOptions, FlexbufferSerializer, MapBuilder, Pushable, VectorBuilder,
};
pub use flexbuffer_type::FlexBufferType;
pub use buffer::Buffer;
pub use reader::Error as ReaderError;
pub use reader::{DeserializationError, MapReader, Reader, ReaderIterator, VectorReader};
use serde::{Deserialize, Serialize};
Expand All @@ -64,17 +67,32 @@ pub fn to_vec<T: Serialize>(x: T) -> Result<Vec<u8>, SerializationError> {
x.serialize(&mut s)?;
Ok(s.take_buffer())
}

/// Deserialize a type from a flexbuffer.
pub fn from_slice<'de, T: Deserialize<'de>>(buf: &'de [u8]) -> Result<T, DeserializationError> {
let r = Reader::get_root(buf)?;
T::deserialize(r)
}

/// Deserialize a type from a flexbuffer.
pub fn from_buffer<'de, T: Deserialize<'de>, B: Buffer>(
buf: &'de B
) -> Result<T, DeserializationError> {
let r = Reader::get_root(buf as &'de [u8])?;
T::deserialize(r)
}

/// This struct, when pushed will be serialized as a `FlexBufferType::Blob`.
///
/// A `Blob` is a variable width `length` followed by that many bytes of data.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub struct Blob<'a>(pub &'a [u8]);
#[derive(Debug, PartialEq, Eq)]
pub struct Blob<B>(pub B);

impl<B: Buffer> Clone for Blob<B> {
fn clone(&self) -> Self {
Blob(self.0.shallow_copy())
}
}

/// This struct, when pushed, will be serialized as a `FlexBufferType::IndirectUInt`.
///
Expand Down
31 changes: 24 additions & 7 deletions rust/flexbuffers/src/reader/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ impl std::fmt::Display for DeserializationError {
}
}
}

impl serde::de::Error for DeserializationError {
fn custom<T>(msg: T) -> Self
where
Expand All @@ -43,14 +44,16 @@ impl serde::de::Error for DeserializationError {
Self::Serde(format!("{}", msg))
}
}

impl std::convert::From<super::Error> for DeserializationError {
fn from(e: super::Error) -> Self {
Self::Reader(e)
}
}

impl<'de> SeqAccess<'de> for ReaderIterator<'de> {
impl<'de> SeqAccess<'de> for ReaderIterator<&'de [u8]> {
type Error = DeserializationError;

fn next_element_seed<T>(
&mut self,
seed: T,
Expand All @@ -64,19 +67,21 @@ impl<'de> SeqAccess<'de> for ReaderIterator<'de> {
Ok(None)
}
}

fn size_hint(&self) -> Option<usize> {
Some(self.len())
}
}

struct EnumReader<'de> {
variant: &'de str,
value: Option<Reader<'de>>,
value: Option<Reader<&'de [u8]>>,
}

impl<'de> EnumAccess<'de> for EnumReader<'de> {
type Error = DeserializationError;
type Variant = Reader<'de>;
type Variant = Reader<&'de [u8]>;

fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
where
V: DeserializeSeed<'de>,
Expand All @@ -87,9 +92,10 @@ impl<'de> EnumAccess<'de> for EnumReader<'de> {
}

struct MapAccessor<'de> {
keys: ReaderIterator<'de>,
vals: ReaderIterator<'de>,
keys: ReaderIterator<&'de [u8]>,
vals: ReaderIterator<&'de [u8]>,
}

impl<'de> MapAccess<'de> for MapAccessor<'de> {
type Error = DeserializationError;

Expand All @@ -103,6 +109,7 @@ impl<'de> MapAccess<'de> for MapAccessor<'de> {
Ok(None)
}
}

fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value, Self::Error>
where
V: DeserializeSeed<'de>,
Expand All @@ -112,17 +119,20 @@ impl<'de> MapAccess<'de> for MapAccessor<'de> {
}
}

impl<'de> VariantAccess<'de> for Reader<'de> {
impl<'de> VariantAccess<'de> for Reader<&'de [u8]> {
type Error = DeserializationError;

fn unit_variant(self) -> Result<(), Self::Error> {
Ok(())
}

fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
where
T: DeserializeSeed<'de>,
{
seed.deserialize(self)
}

// Tuple variants have an internally tagged representation. They are vectors where Index 0 is
// the discriminant and index N is field N-1.
fn tuple_variant<V>(self, _len: usize, visitor: V) -> Result<V::Value, Self::Error>
Expand All @@ -131,6 +141,7 @@ impl<'de> VariantAccess<'de> for Reader<'de> {
{
visitor.visit_seq(self.as_vector().iter())
}

// Struct variants have an internally tagged representation. They are vectors where Index 0 is
// the discriminant and index N is field N-1.
fn struct_variant<V>(
Expand All @@ -149,7 +160,7 @@ impl<'de> VariantAccess<'de> for Reader<'de> {
}
}

impl<'de> Deserializer<'de> for crate::Reader<'de> {
impl<'de> Deserializer<'de> for Reader<&'de [u8]> {
type Error = DeserializationError;
fn is_human_readable(&self) -> bool {
cfg!(deserialize_human_readable)
Expand Down Expand Up @@ -188,22 +199,26 @@ impl<'de> Deserializer<'de> for crate::Reader<'de> {
(ty, bw) => unreachable!("TODO deserialize_any {:?} {:?}.", ty, bw),
}
}

serde::forward_to_deserialize_any! {
bool i8 i16 i32 i64 u8 u16 u32 u64 f32 f64 str unit unit_struct bytes
ignored_any map identifier struct tuple tuple_struct seq string
}

fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
visitor.visit_char(self.as_u8() as char)
}

fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
visitor.visit_byte_buf(self.get_blob()?.0.to_vec())
}

fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
Expand All @@ -214,6 +229,7 @@ impl<'de> Deserializer<'de> for crate::Reader<'de> {
visitor.visit_some(self)
}
}

fn deserialize_newtype_struct<V>(
self,
_name: &'static str,
Expand All @@ -224,6 +240,7 @@ impl<'de> Deserializer<'de> for crate::Reader<'de> {
{
visitor.visit_newtype_struct(self)
}

fn deserialize_enum<V>(
self,
_name: &'static str,
Expand Down
24 changes: 14 additions & 10 deletions rust/flexbuffers/src/reader/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,20 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use super::{Reader, VectorReader};
use crate::{Buffer, Reader, VectorReader};
use std::iter::{DoubleEndedIterator, ExactSizeIterator, FusedIterator, Iterator};

/// Iterates over a flexbuffer vector, typed vector, or map. Yields [Readers](struct.Reader.html).
///
/// If any error occurs, the Reader is defaulted to a Null flexbuffer Reader.
pub struct ReaderIterator<'de> {
pub(super) reader: VectorReader<'de>,
pub struct ReaderIterator<B> {
pub(super) reader: VectorReader<B>,
pub(super) front: usize,
end: usize,
}
impl<'de> ReaderIterator<'de> {
pub(super) fn new(reader: VectorReader<'de>) -> Self {

impl<B: Buffer> ReaderIterator<B> {
pub(super) fn new(reader: VectorReader<B>) -> Self {
let end = reader.len();
ReaderIterator {
reader,
Expand All @@ -33,8 +34,9 @@ impl<'de> ReaderIterator<'de> {
}
}
}
impl<'de> Iterator for ReaderIterator<'de> {
type Item = Reader<'de>;

impl<B: Buffer> Iterator for ReaderIterator<B> {
type Item = Reader<B>;
fn next(&mut self) -> Option<Self::Item> {
if self.front < self.end {
let r = self.reader.idx(self.front);
Expand All @@ -49,7 +51,8 @@ impl<'de> Iterator for ReaderIterator<'de> {
(remaining, Some(remaining))
}
}
impl<'de> DoubleEndedIterator for ReaderIterator<'de> {

impl<B: Buffer> DoubleEndedIterator for ReaderIterator<B> {
fn next_back(&mut self) -> Option<Self::Item> {
if self.front < self.end {
self.end -= 1;
Expand All @@ -59,5 +62,6 @@ impl<'de> DoubleEndedIterator for ReaderIterator<'de> {
}
}
}
impl<'de> ExactSizeIterator for ReaderIterator<'de> {}
impl<'de> FusedIterator for ReaderIterator<'de> {}

impl<B: Buffer> ExactSizeIterator for ReaderIterator<B> {}
impl<B: Buffer> FusedIterator for ReaderIterator<B> {}
Loading

0 comments on commit 4174c10

Please sign in to comment.