Skip to content

brpandey/radix_trie

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

36 Commits
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

Radix Trie

A simple space-optimized trie written in Rust

flowchart TB
    id1(m) --> id2 & id3 & id6 & id7
    id2(andala)
    id3(u) --> id4 & id5
    id4(ave eraser)
    id5(scle cars)
    id6(exican sombrero)
    id7(o)
    id7 --> id8 & id9 & id10
    id8(bile)
    id9(n) --> id11 & id12
    id10(u) --> id13 & id14
    id11(eypot)
    id12(itor)
    id13(s) --> id15 & id16
    id14(thguard)
    id15(epad)
    id16(y brown hair dye)
Loading
use radix_trie::trie::{Trie, LabelsIter};
use std::collections::BTreeSet;

fn main() {
    // Radix Trie Example
    println!("Search suggestions");

    // Search terms is a trie,
    // K is AsRef<[u8]> which &str is and u16 fits into any generic V
    let search_terms: Trie<&str, u16> =
        [("mobile", 10),("mandala", 67),("mousy brown hair dye", 23),("moneypot", 45),
         ("mexican sombrero", 27), ("muscle cars", 11), ("mouthguard", 8),
         ("monitor", 7),("mousepad", 2361), ("muave eraser", 98)]
        .iter().cloned().collect();

    let set = labels_helper(search_terms.labels());

    // Check the label of the static tree (see picture diagram)
    assert_eq!(set, BTreeSet::from(["andala", "ave eraser", "bile", "epad", "exican sombrero", "eypot",
                                    "itor", "m", "n", "o", "s", "scle cars", "thguard", "u", "y brown hair dye"]));

    let mut keys = search_terms.all_keys("me");
    assert_eq!(vec!["mexican sombrero"], flatten_keys(keys.as_ref()));

    keys = search_terms.all_keys("mu");
    assert_eq!(vec!["muave eraser", "muscle cars"], flatten_keys(keys.as_ref()));

    let searched_word = "mouse".bytes();
    let mut user_typed = vec![];

    // Simulate user typing in a search query and
    // viewing the dropdown box of potentially matching results
    // as user is typing in the searched_word
    for (i, b) in searched_word.enumerate() {
        user_typed.push(b); // add each character (byte in this case) to user_typed

        // retrieve list of matching keys from trie
        keys = search_terms.all_keys(std::str::from_utf8(&user_typed).unwrap_or_default());

        let v: Vec<&str> = flatten_keys(keys.as_ref());
        println!("Search results, for typed text: {:?} ---> {:?}",
                 std::str::from_utf8(&user_typed).unwrap_or_default(),
                 &v);

        match i {
            0 => assert_eq!(v, vec!["mandala", "mexican sombrero", "mobile", "moneypot",
                                    "monitor", "mousepad", "mousy brown hair dye",
                                    "mouthguard", "muave eraser", "muscle cars"]),
            1 => assert_eq!(v, vec!["mobile", "moneypot", "monitor", "mousepad",
                                    "mousy brown hair dye", "mouthguard"]),
            2 => assert_eq!(v, vec!["mousepad", "mousy brown hair dye",
                                    "mouthguard"]),
            3 => assert_eq!(v, vec!["mousepad", "mousy brown hair dye"]),
            4 => assert_eq!(v, vec!["mousepad"]),
            _ => (),
        }
    }
}

pub fn flatten_keys<'a>(keys: Option<&'a Vec<Vec<u8>>>) -> Vec<&'a str> {
    keys.map_or(vec![], |k| {
        let mut v = k.iter()
            .map(|bytes| std::str::from_utf8(bytes).unwrap_or_default())
            .collect::<Vec<_>>();
        v.sort_unstable();
        v
    })
}

fn labels_helper<'a, K: 'a, V: 'a>(labels: LabelsIter<'a, K, V>) -> BTreeSet<&'a str> {
    labels.map(|bytes| std::str::from_utf8(bytes).unwrap()).collect::<BTreeSet<&str>>()
}
Search suggestions

Search results, for typed text: "m" ---> 
["mandala", "mexican sombrero", "mobile", "moneypot", "monitor", "mousepad", 
"mousy brown hair dye", "mouthguard", "muave eraser", "muscle cars"]

Search results, for typed text: "mo" ---> 
["mobile", "moneypot", "monitor", "mousepad", "mousy brown hair dye", "mouthguard"]

Search results, for typed text: "mou" ---> 
["mousepad", "mousy brown hair dye", "mouthguard"]

Search results, for typed text: "mous" ---> 
["mousepad", "mousy brown hair dye"]

Search results, for typed text: "mouse" ---> 
["mousepad"]

Upon deletion handles a combination of unmarking, pruning, and compression Picture taken from Advanced Algorithms and Data Structures by Marcello La Rocca

Thanks! Bibek

PS (with love)

Some tests ;)

flowchart LR;
    id1(an) ---> id2 & id3
    id2(t) --> id4 & id5
    id3(d)
    id4(hem) --> id6
    id5(i)
    id6(ion)
Loading
    #[test]
    fn search_basic() {
        let trie: Trie<_, _> = [("anthem", 1), ("anti", 2), ("anthemion", 7), ("and", 77)].iter().cloned().collect();
        assert_eq!(&1, trie.search("anthem").unwrap());
        assert_eq!(None, trie.search("ant"))
    }

    #[test]
    fn search_with_remove() {
        let mut trie: Trie<_, _> = [("anthem", "one"), ("anti", "two"), ("anthemion", "seven"), ("and", "seventy-seven")].iter().cloned().collect();
        assert_eq!(&"two", trie.search("anti").unwrap());
        assert_eq!(Some("two"), trie.remove("anti"));
        assert_eq!(None, trie.search("anti"));
    }

    #[test]
    fn search_with_replace_insert() {
        let mut trie: Trie<_, _> = [("anthem", 1), ("anti", 2), ("anthemion", 7), ("and", 77)].iter().cloned().collect();
        assert_eq!(&1, trie.search("anthem").unwrap());
        assert_eq!(Some(1), trie.insert("anthem", 98));
        assert_eq!(&98, trie.search("anthem").unwrap());
    }

    #[test]
    fn search_with_replace_insert_w_reference_values() {
        let important: u16 = 42;
        let i = &important;
        let vip: u16 = 98;

        let mut trie: Trie<_, _> = [("anthem", i), ("anti", i), ("anthemion", i), ("and", i)].iter().cloned().collect();
        assert_eq!(&i, trie.search("anthem").unwrap());
        assert_eq!(Some(i), trie.insert("anthem", &vip));
        assert_eq!(&&vip, trie.search("anthem").unwrap());
    }

    #[test]
    fn check_all_keys() {
        let trie: Trie<_, _> = [("anthem", 1), ("anti", 2), ("anthemion", 7), ("and", 77)].iter().cloned().collect();

        let mut keys = trie.all_keys("ant").unwrap();
        keys.sort();

        let nested = vec![
            vec![97, 110, 116, 104, 101, 109],
            vec![97, 110, 116, 104, 101, 109, 105, 111, 110],
            vec![97, 110, 116, 105]
        ];

        let flattened: Vec<u8> = nested.iter().flat_map(|v| v.iter()).cloned().collect();
        let keys_flattened: Vec<u8> = keys.iter().flat_map(|v| v.iter()).cloned().collect();

        assert_eq!(flattened, keys_flattened);

        let result = keys_helper(Some(keys.as_ref()));

        assert_eq!(vec!["anthem", "anthemion", "anti"], result);
    }

    #[test]
    fn check_longest_prefix() {
        let trie: Trie<_, _> = [("anthem", 1), ("anti", 2), ("anthemion", 7), ("and", 77)].iter().cloned().collect();

        let result = trie.longest_prefix("anthemio").unwrap().cloned().collect::<Vec<_>>();

        assert_eq!("anthem", std::str::from_utf8(&result).unwrap());
    }


    #[test]
    fn passthru_removes() {
        let mut trie: Trie<_, _> = [("anthem", 1), ("anti", 2), ("anthemion", 7), ("and", 77)].iter().cloned().collect();

        let keys = trie.all_keys("an");
        let result = keys_helper(keys.as_ref());

        assert_eq!(vec!["and", "anthem", "anthemion", "anti"], result);

        // remove passthru that is followed by a pruned edge
        assert_eq!(2, trie.remove("anti").unwrap());
        assert_eq!(None, trie.search("anti"));
        assert_eq!(&1, trie.search("anthem").unwrap());

        // remove passthrough that has a child
        assert_eq!(1, trie.remove("anthem").unwrap());

        let keys = trie.all_keys("an");
        let result = keys_helper(keys.as_ref());

        assert_eq!(vec!["and", "anthemion"], result);
    }


    #[test]
    fn delete_all() {
        let mut trie: Trie<_, _> = [("anthem", 1), ("anti", 2), ("anthemion", 7), ("and", 77)].iter().cloned().collect();

        let keys = trie.all_keys("an");
        let result = keys_helper(keys.as_ref());

        assert_eq!(vec!["and", "anthem", "anthemion", "anti"], result);

        // skip the first &str "and" then delete it after the loop 
        for (i, k) in result.iter().skip(1).enumerate() {
            trie.remove(k);

            let keys = trie.all_keys("an");
            let v: Vec<&str> = keys_helper(keys.as_ref());

            match i {
                0 => assert_eq!(v, vec!["and", "anthemion", "anti"]),
                1 => assert_eq!(v, vec!["and", "anti"]),
                2 => assert_eq!(v, vec!["and"]),
                _ => (),
            }

            assert_eq!(trie.remove("nonexistent1"), None);
        }

        assert_eq!(trie.remove("and").unwrap(), 77);
        assert_eq!(trie.all_keys("an"), None);
        assert_eq!(trie.remove("nonexistent2"), None);
        assert_eq!(trie.is_empty(), true);
    }


    #[test]
    fn check_compressed_labels() {
        let mut trie: Trie<_, _> = [("anthem", 1), ("anti", 2), ("anthemion", 7), ("and", 77)].iter().cloned().collect();

        let keys = trie.all_keys("an");
        let keys_vec = keys_helper(keys.as_ref());

        assert_eq!(vec!["and", "anthem", "anthemion", "anti"], keys_vec);

        // skip the first &str "and" then delete it after the loop 
        for (i, k) in keys_vec.iter().enumerate() {

            let set = labels_helper(trie.labels());

            // Through each delete iteration, check the labels of the trees
            // Since the deletion causes labels to be compressed/deleted expect number of labels to reduce
            match i {
                0 => assert_eq!(set, BTreeSet::from(["an", "d", "hem", "i", "ion", "t"])),
                1 => assert_eq!(set, BTreeSet::from(["ant", "hem", "i", "ion"])),
                2 => assert_eq!(set, BTreeSet::from(["ant", "hemion", "i"])),
                3 => assert_eq!(set, BTreeSet::from(["anti"])),
                _ => (),
            }

            trie.remove(k);
        }
    }


    #[test]
    fn check_values_iter() {
        let mut trie: Trie<_, _> = [("anthem", 1), ("anti", 2), ("anthemion", 7), ("and", 77)].iter().cloned().collect();

        let _ = trie.values_mut().map(|v| { *v = *v * 5; v } ).collect::<BTreeSet<&mut i32>>();
        assert_eq!(5, trie.remove("anthem").unwrap());

        let set2 = trie.values().collect::<BTreeSet<&i32>>();
        assert_eq!(BTreeSet::from([&10, &35, &385]), set2)
    }

    #[test]
    fn check_values_into_iter() {
        let trie: Trie<_, _> = [("anthem", 1), ("anti", 2), ("anthemion", 7), ("and", 77)].iter().cloned().collect();
        let vec1 = trie.into_iter().map(|mut v| { v = v + 1; v } ).collect::<BTreeSet<i32>>();
        assert_eq!(vec1, BTreeSet::from([2, 3, 8, 78]));
    }

    #[test]
    fn check_leafpairs_iter() {
        let trie: Trie<_, _> = [("anthem", 1), ("anti", 2), ("anthemion", 7), ("and", 77)].iter().cloned().collect();
        let set = trie.iter().collect::<BTreeSet<(&[u8], &i32)>>();
        assert_eq!(BTreeSet::from([("d".as_bytes(), &77), ("hem".as_bytes(), &1), ("i".as_bytes(), &2), ("ion".as_bytes(), &7)]), set)
    }

About

A simple space-optimized trie written in Rust

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages