forked from facebook/hermes
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Summary: The various Hermes parsers require that their input be null terminated. That is an invariant that has proven difficult to maintain in practice because it is not explicit in the C++ interfaces. `NullTerminatedBuf` aims to solve this problem when Hermes parsers are exposed to Rust by making the invariant explicit. It provides a reference to a slice that is always explicitly null terminated. Internally it either just borrows a reference, or owns a copy of the original non-terminated input. `NullTerminatedBuf` can be created from `&[u8]`, `&str` and by reading from a file. The first two provide options to check whether the input is already null-terminated and just borrow it instead of copying. That can be useful if the input is already null terminated for another reason. The more interesting from performance perspective is creating a `NullTerminatedBuf` from a file. In terms of functionality it is equivalent to `llvm::MemoryBuffer::getFile()`. The current implementation is extremely primitive, it simply reads the entire file into a Vec and appends a 0. There are many possibilities for optimizations, but applying them was not trivial since we weren't able to find portable ways for checking if a file is a "regular file" (as opposed to a pipe, a character device, etc) in Rust. Listing them here for posterity: - Obtain the size the file in advance to avoid reallocations and to minimize number of syscalls (hopefully down to one, if not interrupted). - If the file is not aligned to page, memory map it. The "filler space" past the end is guaranteed to be zeroes. - Expose `llvh::MemoryBuffer` through a couple of simple C wrappers and just use that. Reviewed By: avp Differential Revision: D30444136 fbshipit-source-id: 9b1d198a6de385ed075d89a1229313e14c56ad0a
- Loading branch information
1 parent
d5b1ff6
commit f1150ce
Showing
5 changed files
with
143 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,3 +23,6 @@ pub mod hermes_utf; | |
|
||
#[allow(dead_code)] | ||
pub mod hparser; | ||
|
||
#[allow(dead_code)] | ||
pub mod nullbuf; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* This source code is licensed under the MIT license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
use std::io::{BufReader, Read}; | ||
use std::os::raw::c_char; | ||
|
||
/// An abstraction for a null-terminated buffer either read from disk, copied | ||
/// or borrowed. | ||
pub struct NullTerminatedBuf<'a>(Inner<'a>); | ||
|
||
// This enum must be separate because we can't make the variants private. | ||
enum Inner<'a> { | ||
Own(Vec<u8>), | ||
Ref(&'a [u8]), | ||
} | ||
|
||
impl NullTerminatedBuf<'_> { | ||
/// A reference to an existing NullTerminatedBuf which can be passed by value. | ||
pub fn as_ref_buf<'a>(buf: &'a NullTerminatedBuf<'a>) -> NullTerminatedBuf<'a> { | ||
NullTerminatedBuf(Inner::Ref(buf.as_bytes())) | ||
} | ||
|
||
/// Create from a file and null terminated. | ||
pub fn from_file(f: &mut std::fs::File) -> Result<NullTerminatedBuf, std::io::Error> { | ||
// TODO: this is an extremely naive implementation, it can be optimized in multiple ways: | ||
// - obtain the size of the file and perform a single allocation and few syscalls | ||
// - memory map the file | ||
// - just use LLVM's MemoryBuffer | ||
// One problem is that there isn't an obvious way in Rust to check portably whether | ||
// something has a fixed size and is memory mappable (i.e. is not a pipe). | ||
|
||
let mut reader = BufReader::new(f); | ||
let mut v = Vec::<u8>::new(); | ||
reader.read_to_end(&mut v)?; | ||
v.push(0); | ||
|
||
Ok(NullTerminatedBuf(Inner::Own(v))) | ||
} | ||
|
||
/// Create by copying a slice and appending null-termination. | ||
pub fn from_slice_copy(s: &[u8]) -> NullTerminatedBuf { | ||
let mut v = Vec::with_capacity(s.len() + 1); | ||
v.extend_from_slice(s); | ||
v.push(0); | ||
NullTerminatedBuf(Inner::Own(v)) | ||
} | ||
|
||
/// Create from a slice that may already be null-terminated. If it is, | ||
/// borrow it, otherwise create a null-terminated copy. | ||
pub fn from_slice_check(s: &[u8]) -> NullTerminatedBuf { | ||
if let [.., 0] = s { | ||
NullTerminatedBuf(Inner::Ref(s)) | ||
} else { | ||
Self::from_slice_copy(s) | ||
} | ||
} | ||
|
||
/// Create by copying a string and appending null-termination. | ||
pub fn from_str_copy(s: &str) -> NullTerminatedBuf { | ||
Self::from_slice_copy(s.as_bytes()) | ||
} | ||
|
||
/// Create from a string that may already be null-terminated. If it is, | ||
/// borrow it, otherwise create a null-terminated copy. | ||
pub fn from_str_check(s: &str) -> NullTerminatedBuf { | ||
Self::from_slice_check(s.as_bytes()) | ||
} | ||
|
||
/// Return the length of the data including the null terminator. | ||
pub fn len(&self) -> usize { | ||
match &self.0 { | ||
Inner::Own(v) => v.len(), | ||
Inner::Ref(s) => s.len(), | ||
} | ||
} | ||
|
||
/// Just a placeholder always returning `true`, since the there is always | ||
/// at least a null terminator. | ||
pub fn is_empty(&self) -> bool { | ||
false | ||
} | ||
|
||
/// A pointer to the start of the slice. | ||
/// # Safety | ||
/// It is not really unsafe, but is intended to be used in an unsafe context. | ||
pub unsafe fn as_ptr(&self) -> *const u8 { | ||
self.as_bytes().as_ptr() | ||
} | ||
|
||
/// Convenience wrapper returning C `const char *`. | ||
/// # Safety | ||
/// It is not really unsafe, but is intended to be used in an unsafe context. | ||
pub unsafe fn as_c_char_ptr(&self) -> *const c_char { | ||
self.as_ptr() as *const c_char | ||
} | ||
|
||
fn as_bytes(&self) -> &[u8] { | ||
match &self.0 { | ||
Inner::Own(v) => v.as_slice(), | ||
Inner::Ref(s) => s, | ||
} | ||
} | ||
} | ||
|
||
impl std::convert::AsRef<[u8]> for NullTerminatedBuf<'_> { | ||
fn as_ref(&self) -> &[u8] { | ||
self.as_bytes() | ||
} | ||
} |