Skip to content

Commit

Permalink
Bug 1845095: Bloom filter for fast-rejecting :has(). r=emilio
Browse files Browse the repository at this point in the history
  • Loading branch information
dshin-moz committed Jul 31, 2023
1 parent d2a6ec1 commit 604783a
Show file tree
Hide file tree
Showing 12 changed files with 241 additions and 20 deletions.
5 changes: 4 additions & 1 deletion servo/components/selectors/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use crate::bloom::BloomFilter;
use crate::nth_index_cache::{NthIndexCache, NthIndexCacheInner};
use crate::parser::{Selector, SelectorImpl};
use crate::relative_selector::cache::RelativeSelectorCache;
use crate::relative_selector::filter::RelativeSelectorFilterMap;
use crate::tree::{Element, OpaqueElement};

/// What kind of selector matching mode we should use.
Expand Down Expand Up @@ -142,13 +143,15 @@ impl RelativeSelectorMatchingState {
}
}

/// Set of caches that speed up expensive selector matches.
/// Set of caches (And cache-likes) that speed up expensive selector matches.
#[derive(Default)]
pub struct SelectorCaches {
/// A cache to speed up nth-index-like selectors.
pub nth_index: NthIndexCache,
/// A cache to speed up relative selector matches. See module documentation.
pub relative_selector: RelativeSelectorCache,
/// A map of bloom filters to fast-reject relative selector matches.
pub relative_selector_filter_map: RelativeSelectorFilterMap,
}

/// Data associated with the matching process for a element. This context is
Expand Down
19 changes: 19 additions & 0 deletions servo/components/selectors/matching.rs
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,25 @@ fn matches_relative_selectors<E: Element>(
// Did not match, continue on.
continue;
}
// See if we can fast-reject.
if context
.selector_caches
.relative_selector_filter_map
.fast_reject(
element,
relative_selector,
context.quirks_mode(),
)
{
// Alright, add as unmatched to cache.
context.selector_caches.relative_selector.add(
element.opaque(),
relative_selector,
RelativeSelectorCachedMatch::NotMatched,
);
// Then continue on.
continue;
}

let matched = matches_relative_selector(relative_selector, element, context, rightmost);
context.selector_caches.relative_selector.add(
Expand Down
44 changes: 30 additions & 14 deletions servo/components/selectors/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,9 +203,9 @@ macro_rules! with_all_bounds {
pub trait SelectorImpl: Clone + Debug + Sized + 'static {
type ExtraMatchingData<'a>: Sized + Default;
type AttrValue: $($InSelector)*;
type Identifier: $($InSelector)*;
type LocalName: $($InSelector)* + Borrow<Self::BorrowedLocalName>;
type NamespaceUrl: $($CommonBounds)* + Default + Borrow<Self::BorrowedNamespaceUrl>;
type Identifier: $($InSelector)* + PrecomputedHash;
type LocalName: $($InSelector)* + Borrow<Self::BorrowedLocalName> + PrecomputedHash;
type NamespaceUrl: $($CommonBounds)* + Default + Borrow<Self::BorrowedNamespaceUrl> + PrecomputedHash;
type NamespacePrefix: $($InSelector)* + Default;
type BorrowedNamespaceUrl: ?Sized + Eq;
type BorrowedLocalName: ?Sized + Eq;
Expand Down Expand Up @@ -523,18 +523,17 @@ pub struct AncestorHashes {
pub packed_hashes: [u32; 3],
}

fn collect_ancestor_hashes<Impl: SelectorImpl>(
iter: SelectorIter<Impl>,
pub(crate) fn collect_selector_hashes<'a, Impl: SelectorImpl, Iter>(
iter: Iter,
quirks_mode: QuirksMode,
hashes: &mut [u32; 4],
len: &mut usize,
create_inner_iterator: fn(&'a Selector<Impl>) -> Iter,
) -> bool
where
Impl::Identifier: PrecomputedHash,
Impl::LocalName: PrecomputedHash,
Impl::NamespaceUrl: PrecomputedHash,
Iter: Iterator<Item = &'a Component<Impl>>,
{
for component in AncestorIter::new(iter) {
for component in iter {
let hash = match *component {
Component::LocalName(LocalName {
ref name,
Expand Down Expand Up @@ -590,7 +589,13 @@ where
// in the filter if there's more than one selector, as that'd
// exclude elements that may match one of the other selectors.
if list.len() == 1 &&
!collect_ancestor_hashes(list.slice()[0].iter(), quirks_mode, hashes, len)
!collect_selector_hashes(
create_inner_iterator(&list.slice()[0]),
quirks_mode,
hashes,
len,
create_inner_iterator,
)
{
return false;
}
Expand All @@ -608,12 +613,17 @@ where
true
}

fn collect_ancestor_hashes<Impl: SelectorImpl>(
iter: SelectorIter<Impl>,
quirks_mode: QuirksMode,
hashes: &mut [u32; 4],
len: &mut usize,
) {
collect_selector_hashes(AncestorIter::new(iter), quirks_mode, hashes, len, |s| AncestorIter(s.iter()));
}

impl AncestorHashes {
pub fn new<Impl: SelectorImpl>(selector: &Selector<Impl>, quirks_mode: QuirksMode) -> Self
where
Impl::Identifier: PrecomputedHash,
Impl::LocalName: PrecomputedHash,
Impl::NamespaceUrl: PrecomputedHash,
{
// Compute ancestor hashes for the bloom filter.
let mut hashes = [0u32; 4];
Expand Down Expand Up @@ -3455,6 +3465,12 @@ pub mod tests {
}
}

impl PrecomputedHash for DummyAtom {
fn precomputed_hash(&self) -> u32 {
self.0.as_ptr() as u32
}
}

impl<'i> Parser<'i> for DummyParser {
type Impl = DummySelectorImpl;
type Error = SelectorParseErrorKind<'i>;
Expand Down
158 changes: 158 additions & 0 deletions servo/components/selectors/relative_selector/filter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */

/// Bloom filter for relative selectors.
use fxhash::FxHashMap;

use crate::bloom::BloomFilter;
use crate::context::QuirksMode;
use crate::parser::{
collect_selector_hashes, RelativeSelector, RelativeSelectorMatchHint,
};
use crate::tree::{Element, OpaqueElement};
use crate::SelectorImpl;

enum Entry {
/// Filter lookup happened once. Construction of the filter is expensive,
/// so this is set when the element for subtree traversal is encountered.
Lookup,
/// Filter lookup happened more than once, and the filter for this element's
/// subtree traversal is constructed. Could use special handlings for pseudo-classes
/// such as `:hover` and `:focus`, see Bug 1845503.
HasFilter(Box<BloomFilter>),
}

#[derive(Clone, Copy, Hash, Eq, PartialEq)]
enum TraversalKind {
Children,
Descendants,
}

fn add_to_filter<E: Element>(element: &E, filter: &mut BloomFilter, kind: TraversalKind) -> bool {
let mut child = element.first_element_child();
while let Some(e) = child {
if !e.add_element_unique_hashes(filter) {
return false;
}
if kind == TraversalKind::Descendants {
if !add_to_filter(&e, filter, kind) {
return false;
}
}
child = e.next_sibling_element();
}
true
}

#[derive(Clone, Copy, Hash, Eq, PartialEq)]
struct Key(OpaqueElement, TraversalKind);

/// Map of bloom filters for fast-rejecting relative selectors.
#[derive(Default)]
pub struct RelativeSelectorFilterMap {
map: FxHashMap<Key, Entry>,
}

fn fast_reject<Impl: SelectorImpl>(
selector: &RelativeSelector<Impl>,
quirks_mode: QuirksMode,
filter: &BloomFilter,
) -> bool {
let mut hashes = [0u32; 4];
let mut len = 0;
// For inner selectors, we only collect from the single rightmost compound.
// This is because inner selectors can cause breakouts: e.g. `.anchor:has(:is(.a .b) .c)`
// can match when `.a` is the ancestor of `.anchor`. Including `.a` would possibly fast
// reject the subtree for not having `.a`, even if the selector would match.
// Technically, if the selector's traversal is non-sibling subtree, we can traverse
// inner selectors up to the point where a descendant/child combinator is encountered
// (e.g. In `.anchor:has(:is(.a ~ .b) .c)`, `.a` is guaranteed to be the a descendant
// of `.anchor`). While that can be separately handled, well, this is simpler.
collect_selector_hashes(
selector.selector.iter(),
quirks_mode,
&mut hashes,
&mut len,
|s| s.iter(),
);
for i in 0..len {
if !filter.might_contain_hash(hashes[i]) {
// Definitely rejected.
return true;
}
}
false
}

impl RelativeSelectorFilterMap {
fn get_filter<E: Element>(&mut self, element: &E, kind: TraversalKind) -> Option<&BloomFilter> {
// Insert flag to indicate that we looked up the filter once, and
// create the filter if and only if that flag is there.
let key = Key(element.opaque(), kind);
let entry = self
.map
.entry(key)
.and_modify(|entry| {
if !matches!(entry, Entry::Lookup) {
return;
}
let mut filter = BloomFilter::new();
// Go through all children/descendants of this element and add their hashes.
if add_to_filter(element, &mut filter, kind) {
*entry = Entry::HasFilter(Box::new(filter));
}
})
.or_insert(Entry::Lookup);
match entry {
Entry::Lookup => None,
Entry::HasFilter(ref filter) => Some(filter.as_ref()),
}
}

/// Potentially reject the given selector for this element.
/// This may seem redundant in presence of the cache, but the cache keys into the
/// selector-element pair specifically, while this filter keys to the element
/// and the traversal kind, so it is useful for handling multiple selectors
/// that effectively end up looking at the same(-ish, for siblings) subtree.
pub fn fast_reject<Impl: SelectorImpl, E: Element>(
&mut self,
element: &E,
selector: &RelativeSelector<Impl>,
quirks_mode: QuirksMode,
) -> bool {
if matches!(selector.match_hint, RelativeSelectorMatchHint::InNextSibling) {
// Don't bother.
return false;
}
let is_sibling = matches!(
selector.match_hint,
RelativeSelectorMatchHint::InSibling |
RelativeSelectorMatchHint::InNextSiblingSubtree |
RelativeSelectorMatchHint::InSiblingSubtree
);
let is_subtree = matches!(
selector.match_hint,
RelativeSelectorMatchHint::InSubtree |
RelativeSelectorMatchHint::InNextSiblingSubtree |
RelativeSelectorMatchHint::InSiblingSubtree
);
let kind = if is_subtree {
TraversalKind::Descendants
} else {
TraversalKind::Children
};
if is_sibling {
// Contain the entirety of the parent's children/subtree in the filter, and use that.
// This is less likely to reject, especially for sibling subtree matches; however, it's less
// expensive memory-wise, compared to storing filters for each sibling.
element.parent_element().map_or(false, |parent| {
self.get_filter(&parent, kind)
.map_or(false, |filter| fast_reject(selector, quirks_mode, filter))
})
} else {
self.get_filter(element, kind)
.map_or(false, |filter| fast_reject(selector, quirks_mode, filter))
}
}
}
1 change: 1 addition & 0 deletions servo/components/selectors/relative_selector/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */

pub mod cache;
pub mod filter;
5 changes: 5 additions & 0 deletions servo/components/selectors/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//! between layout and style.
use crate::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint};
use crate::bloom::BloomFilter;
use crate::matching::{ElementSelectorFlags, MatchingContext};
use crate::parser::SelectorImpl;
use std::fmt::Debug;
Expand Down Expand Up @@ -160,4 +161,8 @@ pub trait Element: Sized + Clone + Debug {
fn ignores_nth_child_selectors(&self) -> bool {
false
}

/// Add hashes unique to this element to the given filter, returning true
/// if any got added.
fn add_element_unique_hashes(&self, filter: &mut BloomFilter) -> bool;
}
3 changes: 2 additions & 1 deletion servo/components/style/bloom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ pub fn is_attr_name_excluded_from_filter(atom: &crate::Atom) -> bool {
*atom == atom!("class") || *atom == atom!("id") || *atom == atom!("style")
}

fn each_relevant_element_hash<E, F>(element: E, mut f: F)
/// Gather all relevant hash for fast-reject filters from an element.
pub fn each_relevant_element_hash<E, F>(element: E, mut f: F)
where
E: TElement,
F: FnMut(u32),
Expand Down
2 changes: 1 addition & 1 deletion servo/components/style/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,7 @@ pub struct ThreadLocalStyleContext<E: TElement> {
/// A checker used to ensure that parallel.rs does not recurse indefinitely
/// even on arbitrarily deep trees. See Gecko bug 1376883.
pub stack_limit_checker: StackLimitChecker,
/// Collection of caches for speeding up expensive selector matches.
/// Collection of caches (And cache-likes) for speeding up expensive selector matches.
pub selector_caches: SelectorCaches,
}

Expand Down
8 changes: 6 additions & 2 deletions servo/components/style/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,12 @@ impl ElementData {
return InvalidationResult::empty();
}

let mut processor =
StateAndAttrInvalidationProcessor::new(shared_context, element, self, selector_caches);
let mut processor = StateAndAttrInvalidationProcessor::new(
shared_context,
element,
self,
selector_caches,
);

let invalidator = TreeStyleInvalidator::new(element, stack_limit_checker, &mut processor);

Expand Down
7 changes: 7 additions & 0 deletions servo/components/style/gecko/wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
//! the separation between the style system implementation and everything else.
use crate::applicable_declarations::ApplicableDeclarationBlock;
use crate::bloom::each_relevant_element_hash;
use crate::context::{PostAnimationTasks, QuirksMode, SharedStyleContext, UpdateAnimationsTasks};
use crate::data::ElementData;
use crate::dom::{LayoutIterator, NodeInfo, OpaqueNode, TDocument, TElement, TNode, TShadowRoot};
Expand Down Expand Up @@ -69,6 +70,7 @@ use dom::{DocumentState, ElementState};
use euclid::default::Size2D;
use fxhash::FxHashMap;
use selectors::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint};
use selectors::bloom::{BloomFilter, BLOOM_HASH_MASK};
use selectors::matching::VisitedHandlingMode;
use selectors::matching::{ElementSelectorFlags, MatchingContext};
use selectors::sink::Push;
Expand Down Expand Up @@ -2090,4 +2092,9 @@ impl<'le> ::selectors::Element for GeckoElement<'le> {
fn ignores_nth_child_selectors(&self) -> bool {
self.is_root_of_native_anonymous_subtree()
}

fn add_element_unique_hashes(&self, filter: &mut BloomFilter) -> bool {
each_relevant_element_hash(*self, |hash| filter.insert_hash(hash & BLOOM_HASH_MASK));
true
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use crate::values::AtomIdent;
use crate::{CaseSensitivityExt, LocalName, Namespace, WeakAtom};
use dom::ElementState;
use selectors::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint};
use selectors::bloom::BloomFilter;
use selectors::matching::{ElementSelectorFlags, MatchingContext};
use selectors::{Element, OpaqueElement};
use std::cell::Cell;
Expand Down Expand Up @@ -388,4 +389,9 @@ where
.assigned_slot()
.map(|e| ElementWrapper::new(e, self.snapshot_map))
}

fn add_element_unique_hashes(&self, _filter: &mut BloomFilter) -> bool {
// Should not be relevant in the context of checking past elements in invalidation.
false
}
}
Loading

0 comments on commit 604783a

Please sign in to comment.