From 604783a77a77001d801888a15c452fad55e064d1 Mon Sep 17 00:00:00 2001 From: David Shin Date: Mon, 31 Jul 2023 12:21:24 +0000 Subject: [PATCH] Bug 1845095: Bloom filter for fast-rejecting `:has()`. r=emilio Differential Revision: https://phabricator.services.mozilla.com/D184525 --- servo/components/selectors/context.rs | 5 +- servo/components/selectors/matching.rs | 19 +++ servo/components/selectors/parser.rs | 44 +++-- .../selectors/relative_selector/filter.rs | 158 ++++++++++++++++++ .../selectors/relative_selector/mod.rs | 1 + servo/components/selectors/tree.rs | 5 + servo/components/style/bloom.rs | 3 +- servo/components/style/context.rs | 2 +- servo/components/style/data.rs | 8 +- servo/components/style/gecko/wrapper.rs | 7 + .../invalidation/element/element_wrapper.rs | 6 + servo/components/style/sharing/checks.rs | 3 +- 12 files changed, 241 insertions(+), 20 deletions(-) create mode 100644 servo/components/selectors/relative_selector/filter.rs diff --git a/servo/components/selectors/context.rs b/servo/components/selectors/context.rs index 2b61749d56f92..2ec37ce3fea45 100644 --- a/servo/components/selectors/context.rs +++ b/servo/components/selectors/context.rs @@ -7,6 +7,7 @@ use crate::bloom::BloomFilter; use crate::nth_index_cache::{NthIndexCache, NthIndexCacheInner}; use crate::parser::{Selector, SelectorImpl}; use crate::relative_selector::cache::RelativeSelectorCache; +use crate::relative_selector::filter::RelativeSelectorFilterMap; use crate::tree::{Element, OpaqueElement}; /// What kind of selector matching mode we should use. @@ -142,13 +143,15 @@ impl RelativeSelectorMatchingState { } } -/// Set of caches that speed up expensive selector matches. +/// Set of caches (And cache-likes) that speed up expensive selector matches. #[derive(Default)] pub struct SelectorCaches { /// A cache to speed up nth-index-like selectors. pub nth_index: NthIndexCache, /// A cache to speed up relative selector matches. See module documentation. pub relative_selector: RelativeSelectorCache, + /// A map of bloom filters to fast-reject relative selector matches. + pub relative_selector_filter_map: RelativeSelectorFilterMap, } /// Data associated with the matching process for a element. This context is diff --git a/servo/components/selectors/matching.rs b/servo/components/selectors/matching.rs index ba31bd44b5e70..51080a4c93a80 100644 --- a/servo/components/selectors/matching.rs +++ b/servo/components/selectors/matching.rs @@ -463,6 +463,25 @@ fn matches_relative_selectors( // Did not match, continue on. continue; } + // See if we can fast-reject. + if context + .selector_caches + .relative_selector_filter_map + .fast_reject( + element, + relative_selector, + context.quirks_mode(), + ) + { + // Alright, add as unmatched to cache. + context.selector_caches.relative_selector.add( + element.opaque(), + relative_selector, + RelativeSelectorCachedMatch::NotMatched, + ); + // Then continue on. + continue; + } let matched = matches_relative_selector(relative_selector, element, context, rightmost); context.selector_caches.relative_selector.add( diff --git a/servo/components/selectors/parser.rs b/servo/components/selectors/parser.rs index 7b0e0d83ac43e..da9f4aedb51d8 100644 --- a/servo/components/selectors/parser.rs +++ b/servo/components/selectors/parser.rs @@ -203,9 +203,9 @@ macro_rules! with_all_bounds { pub trait SelectorImpl: Clone + Debug + Sized + 'static { type ExtraMatchingData<'a>: Sized + Default; type AttrValue: $($InSelector)*; - type Identifier: $($InSelector)*; - type LocalName: $($InSelector)* + Borrow; - type NamespaceUrl: $($CommonBounds)* + Default + Borrow; + type Identifier: $($InSelector)* + PrecomputedHash; + type LocalName: $($InSelector)* + Borrow + PrecomputedHash; + type NamespaceUrl: $($CommonBounds)* + Default + Borrow + PrecomputedHash; type NamespacePrefix: $($InSelector)* + Default; type BorrowedNamespaceUrl: ?Sized + Eq; type BorrowedLocalName: ?Sized + Eq; @@ -523,18 +523,17 @@ pub struct AncestorHashes { pub packed_hashes: [u32; 3], } -fn collect_ancestor_hashes( - iter: SelectorIter, +pub(crate) fn collect_selector_hashes<'a, Impl: SelectorImpl, Iter>( + iter: Iter, quirks_mode: QuirksMode, hashes: &mut [u32; 4], len: &mut usize, + create_inner_iterator: fn(&'a Selector) -> Iter, ) -> bool where - Impl::Identifier: PrecomputedHash, - Impl::LocalName: PrecomputedHash, - Impl::NamespaceUrl: PrecomputedHash, + Iter: Iterator>, { - for component in AncestorIter::new(iter) { + for component in iter { let hash = match *component { Component::LocalName(LocalName { ref name, @@ -590,7 +589,13 @@ where // in the filter if there's more than one selector, as that'd // exclude elements that may match one of the other selectors. if list.len() == 1 && - !collect_ancestor_hashes(list.slice()[0].iter(), quirks_mode, hashes, len) + !collect_selector_hashes( + create_inner_iterator(&list.slice()[0]), + quirks_mode, + hashes, + len, + create_inner_iterator, + ) { return false; } @@ -608,12 +613,17 @@ where true } +fn collect_ancestor_hashes( + iter: SelectorIter, + quirks_mode: QuirksMode, + hashes: &mut [u32; 4], + len: &mut usize, +) { + collect_selector_hashes(AncestorIter::new(iter), quirks_mode, hashes, len, |s| AncestorIter(s.iter())); +} + impl AncestorHashes { pub fn new(selector: &Selector, quirks_mode: QuirksMode) -> Self - where - Impl::Identifier: PrecomputedHash, - Impl::LocalName: PrecomputedHash, - Impl::NamespaceUrl: PrecomputedHash, { // Compute ancestor hashes for the bloom filter. let mut hashes = [0u32; 4]; @@ -3455,6 +3465,12 @@ pub mod tests { } } + impl PrecomputedHash for DummyAtom { + fn precomputed_hash(&self) -> u32 { + self.0.as_ptr() as u32 + } + } + impl<'i> Parser<'i> for DummyParser { type Impl = DummySelectorImpl; type Error = SelectorParseErrorKind<'i>; diff --git a/servo/components/selectors/relative_selector/filter.rs b/servo/components/selectors/relative_selector/filter.rs new file mode 100644 index 0000000000000..234837a64d292 --- /dev/null +++ b/servo/components/selectors/relative_selector/filter.rs @@ -0,0 +1,158 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +/// Bloom filter for relative selectors. +use fxhash::FxHashMap; + +use crate::bloom::BloomFilter; +use crate::context::QuirksMode; +use crate::parser::{ + collect_selector_hashes, RelativeSelector, RelativeSelectorMatchHint, +}; +use crate::tree::{Element, OpaqueElement}; +use crate::SelectorImpl; + +enum Entry { + /// Filter lookup happened once. Construction of the filter is expensive, + /// so this is set when the element for subtree traversal is encountered. + Lookup, + /// Filter lookup happened more than once, and the filter for this element's + /// subtree traversal is constructed. Could use special handlings for pseudo-classes + /// such as `:hover` and `:focus`, see Bug 1845503. + HasFilter(Box), +} + +#[derive(Clone, Copy, Hash, Eq, PartialEq)] +enum TraversalKind { + Children, + Descendants, +} + +fn add_to_filter(element: &E, filter: &mut BloomFilter, kind: TraversalKind) -> bool { + let mut child = element.first_element_child(); + while let Some(e) = child { + if !e.add_element_unique_hashes(filter) { + return false; + } + if kind == TraversalKind::Descendants { + if !add_to_filter(&e, filter, kind) { + return false; + } + } + child = e.next_sibling_element(); + } + true +} + +#[derive(Clone, Copy, Hash, Eq, PartialEq)] +struct Key(OpaqueElement, TraversalKind); + +/// Map of bloom filters for fast-rejecting relative selectors. +#[derive(Default)] +pub struct RelativeSelectorFilterMap { + map: FxHashMap, +} + +fn fast_reject( + selector: &RelativeSelector, + quirks_mode: QuirksMode, + filter: &BloomFilter, +) -> bool { + let mut hashes = [0u32; 4]; + let mut len = 0; + // For inner selectors, we only collect from the single rightmost compound. + // This is because inner selectors can cause breakouts: e.g. `.anchor:has(:is(.a .b) .c)` + // can match when `.a` is the ancestor of `.anchor`. Including `.a` would possibly fast + // reject the subtree for not having `.a`, even if the selector would match. + // Technically, if the selector's traversal is non-sibling subtree, we can traverse + // inner selectors up to the point where a descendant/child combinator is encountered + // (e.g. In `.anchor:has(:is(.a ~ .b) .c)`, `.a` is guaranteed to be the a descendant + // of `.anchor`). While that can be separately handled, well, this is simpler. + collect_selector_hashes( + selector.selector.iter(), + quirks_mode, + &mut hashes, + &mut len, + |s| s.iter(), + ); + for i in 0..len { + if !filter.might_contain_hash(hashes[i]) { + // Definitely rejected. + return true; + } + } + false +} + +impl RelativeSelectorFilterMap { + fn get_filter(&mut self, element: &E, kind: TraversalKind) -> Option<&BloomFilter> { + // Insert flag to indicate that we looked up the filter once, and + // create the filter if and only if that flag is there. + let key = Key(element.opaque(), kind); + let entry = self + .map + .entry(key) + .and_modify(|entry| { + if !matches!(entry, Entry::Lookup) { + return; + } + let mut filter = BloomFilter::new(); + // Go through all children/descendants of this element and add their hashes. + if add_to_filter(element, &mut filter, kind) { + *entry = Entry::HasFilter(Box::new(filter)); + } + }) + .or_insert(Entry::Lookup); + match entry { + Entry::Lookup => None, + Entry::HasFilter(ref filter) => Some(filter.as_ref()), + } + } + + /// Potentially reject the given selector for this element. + /// This may seem redundant in presence of the cache, but the cache keys into the + /// selector-element pair specifically, while this filter keys to the element + /// and the traversal kind, so it is useful for handling multiple selectors + /// that effectively end up looking at the same(-ish, for siblings) subtree. + pub fn fast_reject( + &mut self, + element: &E, + selector: &RelativeSelector, + quirks_mode: QuirksMode, + ) -> bool { + if matches!(selector.match_hint, RelativeSelectorMatchHint::InNextSibling) { + // Don't bother. + return false; + } + let is_sibling = matches!( + selector.match_hint, + RelativeSelectorMatchHint::InSibling | + RelativeSelectorMatchHint::InNextSiblingSubtree | + RelativeSelectorMatchHint::InSiblingSubtree + ); + let is_subtree = matches!( + selector.match_hint, + RelativeSelectorMatchHint::InSubtree | + RelativeSelectorMatchHint::InNextSiblingSubtree | + RelativeSelectorMatchHint::InSiblingSubtree + ); + let kind = if is_subtree { + TraversalKind::Descendants + } else { + TraversalKind::Children + }; + if is_sibling { + // Contain the entirety of the parent's children/subtree in the filter, and use that. + // This is less likely to reject, especially for sibling subtree matches; however, it's less + // expensive memory-wise, compared to storing filters for each sibling. + element.parent_element().map_or(false, |parent| { + self.get_filter(&parent, kind) + .map_or(false, |filter| fast_reject(selector, quirks_mode, filter)) + }) + } else { + self.get_filter(element, kind) + .map_or(false, |filter| fast_reject(selector, quirks_mode, filter)) + } + } +} diff --git a/servo/components/selectors/relative_selector/mod.rs b/servo/components/selectors/relative_selector/mod.rs index a2fa380380497..6dd39f7327e6b 100644 --- a/servo/components/selectors/relative_selector/mod.rs +++ b/servo/components/selectors/relative_selector/mod.rs @@ -3,3 +3,4 @@ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ pub mod cache; +pub mod filter; diff --git a/servo/components/selectors/tree.rs b/servo/components/selectors/tree.rs index 560b1e61d8175..c1ea8ff5aeee0 100644 --- a/servo/components/selectors/tree.rs +++ b/servo/components/selectors/tree.rs @@ -6,6 +6,7 @@ //! between layout and style. use crate::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint}; +use crate::bloom::BloomFilter; use crate::matching::{ElementSelectorFlags, MatchingContext}; use crate::parser::SelectorImpl; use std::fmt::Debug; @@ -160,4 +161,8 @@ pub trait Element: Sized + Clone + Debug { fn ignores_nth_child_selectors(&self) -> bool { false } + + /// Add hashes unique to this element to the given filter, returning true + /// if any got added. + fn add_element_unique_hashes(&self, filter: &mut BloomFilter) -> bool; } diff --git a/servo/components/style/bloom.rs b/servo/components/style/bloom.rs index c111454392e46..824acb7114106 100644 --- a/servo/components/style/bloom.rs +++ b/servo/components/style/bloom.rs @@ -111,7 +111,8 @@ pub fn is_attr_name_excluded_from_filter(atom: &crate::Atom) -> bool { *atom == atom!("class") || *atom == atom!("id") || *atom == atom!("style") } -fn each_relevant_element_hash(element: E, mut f: F) +/// Gather all relevant hash for fast-reject filters from an element. +pub fn each_relevant_element_hash(element: E, mut f: F) where E: TElement, F: FnMut(u32), diff --git a/servo/components/style/context.rs b/servo/components/style/context.rs index a7e1e7414a7e2..a2c020475bcc7 100644 --- a/servo/components/style/context.rs +++ b/servo/components/style/context.rs @@ -651,7 +651,7 @@ pub struct ThreadLocalStyleContext { /// A checker used to ensure that parallel.rs does not recurse indefinitely /// even on arbitrarily deep trees. See Gecko bug 1376883. pub stack_limit_checker: StackLimitChecker, - /// Collection of caches for speeding up expensive selector matches. + /// Collection of caches (And cache-likes) for speeding up expensive selector matches. pub selector_caches: SelectorCaches, } diff --git a/servo/components/style/data.rs b/servo/components/style/data.rs index ffcfae60ea1eb..44fcc34b4ee9a 100644 --- a/servo/components/style/data.rs +++ b/servo/components/style/data.rs @@ -312,8 +312,12 @@ impl ElementData { return InvalidationResult::empty(); } - let mut processor = - StateAndAttrInvalidationProcessor::new(shared_context, element, self, selector_caches); + let mut processor = StateAndAttrInvalidationProcessor::new( + shared_context, + element, + self, + selector_caches, + ); let invalidator = TreeStyleInvalidator::new(element, stack_limit_checker, &mut processor); diff --git a/servo/components/style/gecko/wrapper.rs b/servo/components/style/gecko/wrapper.rs index dccf8e7905b71..bfae9e39b45e0 100644 --- a/servo/components/style/gecko/wrapper.rs +++ b/servo/components/style/gecko/wrapper.rs @@ -15,6 +15,7 @@ //! the separation between the style system implementation and everything else. use crate::applicable_declarations::ApplicableDeclarationBlock; +use crate::bloom::each_relevant_element_hash; use crate::context::{PostAnimationTasks, QuirksMode, SharedStyleContext, UpdateAnimationsTasks}; use crate::data::ElementData; use crate::dom::{LayoutIterator, NodeInfo, OpaqueNode, TDocument, TElement, TNode, TShadowRoot}; @@ -69,6 +70,7 @@ use dom::{DocumentState, ElementState}; use euclid::default::Size2D; use fxhash::FxHashMap; use selectors::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint}; +use selectors::bloom::{BloomFilter, BLOOM_HASH_MASK}; use selectors::matching::VisitedHandlingMode; use selectors::matching::{ElementSelectorFlags, MatchingContext}; use selectors::sink::Push; @@ -2090,4 +2092,9 @@ impl<'le> ::selectors::Element for GeckoElement<'le> { fn ignores_nth_child_selectors(&self) -> bool { self.is_root_of_native_anonymous_subtree() } + + fn add_element_unique_hashes(&self, filter: &mut BloomFilter) -> bool { + each_relevant_element_hash(*self, |hash| filter.insert_hash(hash & BLOOM_HASH_MASK)); + true + } } diff --git a/servo/components/style/invalidation/element/element_wrapper.rs b/servo/components/style/invalidation/element/element_wrapper.rs index 71c3c94dafc5f..eeb068accfd56 100644 --- a/servo/components/style/invalidation/element/element_wrapper.rs +++ b/servo/components/style/invalidation/element/element_wrapper.rs @@ -12,6 +12,7 @@ use crate::values::AtomIdent; use crate::{CaseSensitivityExt, LocalName, Namespace, WeakAtom}; use dom::ElementState; use selectors::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint}; +use selectors::bloom::BloomFilter; use selectors::matching::{ElementSelectorFlags, MatchingContext}; use selectors::{Element, OpaqueElement}; use std::cell::Cell; @@ -388,4 +389,9 @@ where .assigned_slot() .map(|e| ElementWrapper::new(e, self.snapshot_map)) } + + fn add_element_unique_hashes(&self, _filter: &mut BloomFilter) -> bool { + // Should not be relevant in the context of checking past elements in invalidation. + false + } } diff --git a/servo/components/style/sharing/checks.rs b/servo/components/style/sharing/checks.rs index ddd3e379cd168..8413ab17302c2 100644 --- a/servo/components/style/sharing/checks.rs +++ b/servo/components/style/sharing/checks.rs @@ -137,7 +137,8 @@ where let for_element = target.revalidation_match_results(stylist, bloom, selector_caches); - let for_candidate = candidate.revalidation_match_results(stylist, bloom, selector_caches); + let for_candidate = + candidate.revalidation_match_results(stylist, bloom, selector_caches); // This assert "ensures", to some extent, that the two candidates have // matched the same rulehash buckets, and as such, that the bits we're