initial working version
This commit is contained in:
70
Cargo.lock
generated
Normal file
70
Cargo.lock
generated
Normal file
@@ -0,0 +1,70 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||
|
||||
[[package]]
|
||||
name = "levtree"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"sealed",
|
||||
"trait-group",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.70"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.33"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sealed"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4a8caec23b7800fb97971a1c6ae365b6239aaeddfb934d6265f8505e795699d"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.39"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "trait-group"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1b362975c6f0f21a41fbb9ca91fe5dcb7e01e12331360374347476b45f5cb9c"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
20
Cargo.toml
20
Cargo.toml
@@ -2,14 +2,28 @@
|
||||
name = "levtree"
|
||||
version = "0.1.0"
|
||||
authors = ["Walter Oggioni <oggioni.walter@gmail.com>"]
|
||||
license = "MIT"
|
||||
rust-version = "1.60"
|
||||
|
||||
[dependencies]
|
||||
trait-group = "0.1.0"
|
||||
sealed = "0.5"
|
||||
|
||||
[lib]
|
||||
name = "levtree"
|
||||
crate-type = ["lib"]
|
||||
bench = false
|
||||
path = "src/levtree/mod.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "linked_list"
|
||||
path = "src/linked_list.rs"
|
||||
|
||||
[[example]]
|
||||
name = "levtree_example"
|
||||
path = "examples/levtree_example.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "levtree"
|
||||
#path = "src/test.rs"
|
||||
path = "src/main.rs"
|
||||
name = "test"
|
||||
path = "src/test.rs"
|
||||
|
||||
|
54763
examples/cracklib-small
Normal file
54763
examples/cracklib-small
Normal file
File diff suppressed because it is too large
Load Diff
50
examples/levtree_example.rs
Normal file
50
examples/levtree_example.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
extern crate levtree;
|
||||
|
||||
use levtree::LevTrie;
|
||||
use levtree::Trie;
|
||||
use levtree::CaseSensitiveKeyChecker;
|
||||
use levtree::CaseSensitiveLevTrie;
|
||||
use levtree::DamerauLevenshteinDistanceCalculator;
|
||||
|
||||
use std::io::BufReader;
|
||||
use std::io::BufRead;
|
||||
|
||||
trait IntoCharSlice {
|
||||
fn into_char_slice(&self) -> Vec<char>;
|
||||
}
|
||||
|
||||
impl IntoCharSlice for str {
|
||||
fn into_char_slice(&self) -> Vec<char> {
|
||||
self.chars().into_iter().collect::<Vec<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let bytes = include_bytes!("cracklib-small");
|
||||
let reader = BufReader::new(&bytes[..]);
|
||||
let mut trie : CaseSensitiveLevTrie = LevTrie::new();
|
||||
reader.lines()
|
||||
.map(|line| line.unwrap())
|
||||
.for_each(|word : String| {
|
||||
trie.add(word.chars());
|
||||
});
|
||||
|
||||
let keys = ["camel", "coriolis", "mattel", "cruzer", "cpoper", "roublesoot"];
|
||||
|
||||
for _ in 0..50 {
|
||||
for key in keys {
|
||||
let word = &key.into_char_slice()[..];
|
||||
trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
|
||||
}
|
||||
}
|
||||
|
||||
for key in keys {
|
||||
let word = &key.into_char_slice()[..];
|
||||
let results = trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
|
||||
for result in results {
|
||||
let word : String = trie.lineal_descendant(result.word).into_iter().collect();
|
||||
println!("distance: {}, wordkey: {}", result.distance, word);
|
||||
}
|
||||
println!("")
|
||||
}
|
||||
}
|
145
src/levtree.rs
145
src/levtree.rs
@@ -1,145 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
struct Node {
|
||||
key: char,
|
||||
row: Vec<u32>,
|
||||
prev: usize,
|
||||
next: usize,
|
||||
child: usize,
|
||||
parent: usize,
|
||||
processed: bool
|
||||
}
|
||||
|
||||
impl Node {
|
||||
fn new(key: char) -> Node {
|
||||
Node {
|
||||
key,
|
||||
row: Vec::new(),
|
||||
prev: 0,
|
||||
next: 0,
|
||||
child: 0,
|
||||
parent: 0,
|
||||
processed: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Result {
|
||||
word: std::rc::Rc<String>,
|
||||
distance: usize,
|
||||
}
|
||||
|
||||
impl PartialOrd for Result {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.distance.cmp(&other.distance))
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Result {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.distance == other.distance
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for Result {}
|
||||
|
||||
impl Ord for Result {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.distance.cmp(&other.distance)
|
||||
}
|
||||
}
|
||||
//struct Standing {
|
||||
// size: usize,
|
||||
// results: Vec<Result>,
|
||||
//}
|
||||
//
|
||||
//impl Standing {
|
||||
// pub fn new(size: usize) -> Standing {
|
||||
// Standing {
|
||||
// size,
|
||||
// results: BTreeSet::new(),
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// pub fn addResult(&mut self, res: Result) {
|
||||
// self.results.push(res)
|
||||
// }
|
||||
//}
|
||||
|
||||
pub struct Tree {
|
||||
maxsize: usize,
|
||||
allocated: bool,
|
||||
torealloc: bool,
|
||||
nodes : Vec<Node>,
|
||||
checker: fn(c1: char, c2: char) -> bool,
|
||||
distance_calculator: fn() -> usize,
|
||||
words : HashMap<usize, String>
|
||||
}
|
||||
|
||||
impl Tree {
|
||||
pub fn new(words: Vec<String>) -> Tree {
|
||||
let mut result = Tree {
|
||||
maxsize: words.len(),
|
||||
allocated: false,
|
||||
torealloc: false,
|
||||
nodes : vec!(Node::new('\0')),
|
||||
checker: |c1: char, c2: char| c1 == c2,
|
||||
distance_calculator: || -> usize { 0 },
|
||||
words : HashMap::new()
|
||||
};
|
||||
result.build_tree(words);
|
||||
result
|
||||
}
|
||||
|
||||
fn root(&mut self) -> &mut Node {
|
||||
&mut self.nodes[0]
|
||||
}
|
||||
|
||||
fn add_child(&mut self, parent_index : usize, key : char) -> usize {
|
||||
let mut new_child = Node::new(key);
|
||||
new_child.parent = parent_index;
|
||||
let mut child = self.nodes[parent_index].child;
|
||||
let mut prev = child;
|
||||
while child != 0 {
|
||||
prev = child;
|
||||
child = self.nodes[child].next
|
||||
}
|
||||
new_child.prev = prev;
|
||||
self.nodes.push(new_child);
|
||||
let new_child_index = self.nodes.len();
|
||||
if prev != 0 {
|
||||
self.nodes[prev].next = new_child_index;
|
||||
}
|
||||
if self.nodes[parent_index].child == 0 {
|
||||
self.nodes[parent_index].child = new_child_index
|
||||
}
|
||||
self.nodes.len() - 1
|
||||
}
|
||||
|
||||
fn build_tree(&mut self, words : Vec<String>) {
|
||||
for word in words {
|
||||
let mut nindex = 0;
|
||||
for c in word.chars() {
|
||||
if self.nodes[nindex].child == 0 {
|
||||
nindex = self.add_child(nindex, c);
|
||||
} else {
|
||||
nindex = self.nodes[nindex].child;
|
||||
loop {
|
||||
if self.nodes[nindex].key == c {
|
||||
break
|
||||
} else {
|
||||
if self.nodes[nindex].next == 0 {
|
||||
nindex = self.add_child(nindex, c);
|
||||
break
|
||||
} else {
|
||||
nindex = self.nodes[nindex].next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let leaf = self.add_child(nindex, '\0');
|
||||
self.words.insert(leaf, word);
|
||||
}
|
||||
}
|
||||
}
|
27
src/levtree/keychecker.rs
Normal file
27
src/levtree/keychecker.rs
Normal file
@@ -0,0 +1,27 @@
|
||||
use super::trienode::TrieKey;
|
||||
|
||||
pub trait KeyChecker<KEY>
|
||||
where KEY : TrieKey {
|
||||
fn check(k1 : Option<KEY>, k2 : Option<KEY>) -> bool;
|
||||
}
|
||||
|
||||
pub struct CaseInsensitiveKeyChecker {}
|
||||
|
||||
impl KeyChecker<char> for CaseInsensitiveKeyChecker {
|
||||
fn check(k1 : Option<char>, k2 : Option<char>) -> bool {
|
||||
k1.zip(k2)
|
||||
.map(| (v1, v2) | {
|
||||
v1.to_lowercase().next() == v2.to_lowercase().next()
|
||||
})
|
||||
.unwrap_or_else(|| { k1 == k2} )
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CaseSensitiveKeyChecker {}
|
||||
|
||||
impl KeyChecker<char> for CaseSensitiveKeyChecker {
|
||||
fn check(k1 : Option<char>, k2 : Option<char>) -> bool {
|
||||
k1 == k2
|
||||
}
|
||||
}
|
||||
|
191
src/levtree/levtrie.rs
Normal file
191
src/levtree/levtrie.rs
Normal file
@@ -0,0 +1,191 @@
|
||||
extern crate sealed;
|
||||
use std::collections::BTreeSet;
|
||||
use self::sealed::sealed;
|
||||
|
||||
use super::trie::Trie;
|
||||
use super::trie::VisitOutcome;
|
||||
use super::trienode::TrieNode;
|
||||
use super::trienode::TrieKey;
|
||||
use super::keychecker::KeyChecker;
|
||||
use super::result::Result;
|
||||
|
||||
pub type LevTrie<KEY, KEYCHECKER,> = Trie<KEY, KEYCHECKER, ()>;
|
||||
pub type LevTrieNode<KEY> = TrieNode<KEY, ()>;
|
||||
|
||||
|
||||
#[sealed]
|
||||
pub trait DistanceCalculator<KEY, KEYCHECKER>
|
||||
where KEY : TrieKey,
|
||||
KEYCHECKER : KeyChecker<KEY>,
|
||||
{
|
||||
fn compute<>(
|
||||
workspace: &mut Vec<Vec<usize>>,
|
||||
nodes: &Vec<LevTrieNode<KEY>>,
|
||||
stack: &Vec<usize>,
|
||||
wordkey : &[KEY],
|
||||
worst_case : Option<usize>) -> VisitOutcome;
|
||||
}
|
||||
|
||||
impl <KEY, KEYCHECKER> LevTrie<KEY, KEYCHECKER>
|
||||
where KEY : TrieKey, KEYCHECKER : KeyChecker<KEY> {
|
||||
pub fn new() -> LevTrie<KEY, KEYCHECKER> {
|
||||
Trie::empty(|| {})
|
||||
}
|
||||
|
||||
pub fn from_words<T : IntoIterator, U : IntoIterator>(wordlist : U) -> LevTrie<KEY, KEYCHECKER>
|
||||
where T : IntoIterator<Item=KEY>, U : IntoIterator<Item=T>
|
||||
{
|
||||
let mut result = LevTrie::new();
|
||||
for word in wordlist {
|
||||
result.add(word);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn fuzzy_search<DC>(&mut self, word: &[KEY], max_result: usize) -> BTreeSet<Result>
|
||||
where DC : DistanceCalculator<KEY, KEYCHECKER> {
|
||||
let word_len = word.into_iter().count();
|
||||
let mut workspace : &mut Vec<Vec<usize>> = &mut (0..self.nodes()).map(|_| { Vec::new() }).collect();
|
||||
let mut results = BTreeSet::new();
|
||||
let required_size= word_len + 1;
|
||||
let visit_pre = |stack : &Vec<usize>| -> VisitOutcome {
|
||||
let stack_size = stack.len();
|
||||
let current_node_id = *stack.last().unwrap();
|
||||
let payload = &mut workspace[current_node_id];
|
||||
payload.resize(required_size, usize::default());
|
||||
if stack_size == 1 {
|
||||
for i in 0..required_size {
|
||||
payload[i] = i;
|
||||
}
|
||||
} else {
|
||||
for i in 0..required_size {
|
||||
payload[i] = if i == 0 {
|
||||
stack_size - 1
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
if stack_size > 1 {
|
||||
let current_node = &mut self.get_node(current_node_id);
|
||||
if current_node.key.is_none() {
|
||||
let distance = workspace[stack[stack_size - 2]][word_len];
|
||||
results.insert(Result {
|
||||
distance: distance,
|
||||
word: current_node_id
|
||||
});
|
||||
if results.len() > max_result {
|
||||
results.pop_last();
|
||||
}
|
||||
VisitOutcome::Skip
|
||||
} else {
|
||||
let worst_case = results.last()
|
||||
.filter(|_| { results.len() == max_result })
|
||||
.map(|it| { it.distance });
|
||||
DC::compute(&mut workspace, &self.nodes, stack, word, worst_case)
|
||||
}
|
||||
} else {
|
||||
VisitOutcome::Continue
|
||||
}
|
||||
};
|
||||
let visit_post = |_ : &Vec<usize>| {
|
||||
};
|
||||
self.walk(visit_pre, visit_post);
|
||||
results
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub struct LevenshteinDistanceCalculator {}
|
||||
|
||||
#[sealed]
|
||||
impl <KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for LevenshteinDistanceCalculator
|
||||
where
|
||||
KEY : TrieKey,
|
||||
KEYCHECKER : KeyChecker<KEY>
|
||||
{
|
||||
|
||||
fn compute<>(
|
||||
workspace: &mut Vec<Vec<usize>>,
|
||||
nodes: &Vec<LevTrieNode<KEY>>,
|
||||
stack: &Vec<usize>,
|
||||
wordkey : &[KEY],
|
||||
worst_case : Option<usize>) -> VisitOutcome {
|
||||
let sz = stack.len();
|
||||
let key_size = wordkey.into_iter().count();
|
||||
for i in 1..=key_size {
|
||||
if KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 1]].key) {
|
||||
workspace[stack[sz - 1]][i] = workspace[stack[sz - 2]][i - 1];
|
||||
} else {
|
||||
workspace[stack[sz - 1]][i] = std::cmp::min(
|
||||
std::cmp::min(
|
||||
workspace[stack[sz - 1]][i - 1],
|
||||
workspace[stack[sz - 2]][i -1]
|
||||
),
|
||||
workspace[stack[sz - 2]][i]
|
||||
) + 1;
|
||||
}
|
||||
}
|
||||
let condition = worst_case.map(
|
||||
|wv| {
|
||||
wv <= *workspace[stack[sz - 1]][..].into_iter().min().unwrap()
|
||||
}).unwrap_or(false);
|
||||
if condition {
|
||||
VisitOutcome::Skip
|
||||
} else {
|
||||
VisitOutcome::Continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DamerauLevenshteinDistanceCalculator {}
|
||||
|
||||
#[sealed]
|
||||
impl <KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for DamerauLevenshteinDistanceCalculator
|
||||
where
|
||||
KEY : TrieKey,
|
||||
KEYCHECKER : KeyChecker<KEY>
|
||||
{
|
||||
|
||||
fn compute<>(
|
||||
workspace: &mut Vec<Vec<usize>>,
|
||||
nodes: &Vec<LevTrieNode<KEY>>,
|
||||
stack: &Vec<usize>,
|
||||
wordkey : &[KEY],
|
||||
worst_case : Option<usize>) -> VisitOutcome {
|
||||
let sz = stack.len();
|
||||
let key_size = wordkey.into_iter().count();
|
||||
for i in 1..=key_size {
|
||||
if KEYCHECKER::check(Some(wordkey[i - 1]),
|
||||
stack.last().and_then(|it| {nodes[*it].key})) {
|
||||
workspace[stack[sz - 1]][i] = workspace[stack[sz - 2]][i - 1];
|
||||
} else {
|
||||
workspace[stack[sz - 1]][i] = std::cmp::min(
|
||||
std::cmp::min(
|
||||
workspace[stack[sz - 1]][i - 1],
|
||||
workspace[stack[sz - 2]][i - 1]
|
||||
),
|
||||
workspace[stack[sz - 2]][i]
|
||||
) + 1;
|
||||
}
|
||||
if sz > 2 &&
|
||||
i > 1 &&
|
||||
KEYCHECKER::check(Some(wordkey[i - 2]), nodes[stack[sz - 1]].key) &&
|
||||
KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 2]].key) {
|
||||
workspace[stack[sz - 1]][i] = std::cmp::min(
|
||||
workspace[stack[sz - 1]][i],
|
||||
workspace[stack[sz - 3]][i - 2] + 1,
|
||||
);
|
||||
}
|
||||
}
|
||||
let condition = worst_case.map(
|
||||
|wv| {
|
||||
wv <= *workspace[stack[sz - 2]][..].into_iter().min().unwrap()
|
||||
}).unwrap_or(false);
|
||||
if condition {
|
||||
VisitOutcome::Skip
|
||||
} else {
|
||||
VisitOutcome::Continue
|
||||
}
|
||||
}
|
||||
}
|
24
src/levtree/mod.rs
Normal file
24
src/levtree/mod.rs
Normal file
@@ -0,0 +1,24 @@
|
||||
#[macro_use] extern crate trait_group;
|
||||
mod levtrie;
|
||||
// pub use self::levtrie::LevTrieNode as LevTrieNode;
|
||||
pub use self::levtrie::LevTrie;
|
||||
pub use self::levtrie::DistanceCalculator;
|
||||
pub use self::levtrie::DamerauLevenshteinDistanceCalculator;
|
||||
pub use self::levtrie::LevenshteinDistanceCalculator;
|
||||
|
||||
mod trie;
|
||||
pub use self::trie::Trie as Trie;
|
||||
|
||||
mod trienode;
|
||||
//use self::trienode::TrieNode as TrieNode;
|
||||
|
||||
mod keychecker;
|
||||
pub use self::keychecker::KeyChecker;
|
||||
pub use self::keychecker::CaseSensitiveKeyChecker;
|
||||
pub use self::keychecker::CaseInsensitiveKeyChecker;
|
||||
|
||||
pub type CaseSensitiveLevTrie = LevTrie<char, CaseSensitiveKeyChecker>;
|
||||
pub type CaseInSensitiveLevTrie = LevTrie<char, CaseInsensitiveKeyChecker>;
|
||||
|
||||
mod result;
|
||||
pub use self::result::Result;
|
59
src/levtree/result.rs
Normal file
59
src/levtree/result.rs
Normal file
@@ -0,0 +1,59 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
pub struct Result {
|
||||
pub word: usize,
|
||||
pub distance: usize,
|
||||
}
|
||||
|
||||
impl PartialOrd for Result {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.distance.cmp(&other.distance))
|
||||
.filter(|it| { it != &Ordering::Equal})
|
||||
.or_else(|| { Some(self.word.cmp(&other.word)) })
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Result {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.distance == other.distance && self.word == other.word
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for Result {
|
||||
}
|
||||
|
||||
impl Ord for Result {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
match self.distance.cmp(&other.distance) {
|
||||
std::cmp::Ordering::Equal => {
|
||||
self.word.cmp(&other.word)
|
||||
}
|
||||
std::cmp::Ordering::Greater => {
|
||||
std::cmp::Ordering::Greater
|
||||
}
|
||||
std::cmp::Ordering::Less => {
|
||||
std::cmp::Ordering::Less
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//struct Standing {
|
||||
// size: usize,
|
||||
// results: Vec<Result>,
|
||||
//}
|
||||
//
|
||||
//impl Standing {
|
||||
// pub fn new(size: usize) -> Standing {
|
||||
// Standing {
|
||||
// size,
|
||||
// results: BTreeSet::new(),
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// pub fn addResult(&mut self, res: Result) {
|
||||
// self.results.push(res)
|
||||
// }
|
||||
//}
|
243
src/levtree/trie.rs
Normal file
243
src/levtree/trie.rs
Normal file
@@ -0,0 +1,243 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::collections::HashMap;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use super::trienode::TrieNode;
|
||||
use super::trienode::TrieKey;
|
||||
use super::levtrie::DistanceCalculator;
|
||||
use super::keychecker::KeyChecker;
|
||||
use super::result::Result;
|
||||
|
||||
pub enum VisitOutcome {
|
||||
Continue,
|
||||
Skip,
|
||||
EarlyExit,
|
||||
}
|
||||
|
||||
pub struct Trie<KEY, KEYCHECKER, PAYLOAD>
|
||||
where KEY : TrieKey, KEYCHECKER : KeyChecker<KEY> {
|
||||
pub nodes : Vec<TrieNode<KEY, PAYLOAD>>,
|
||||
payload_initializer : fn() -> PAYLOAD,
|
||||
tails : BTreeSet<usize>,
|
||||
checker : PhantomData<KEYCHECKER>
|
||||
}
|
||||
|
||||
impl <KEY, KEYCHECKER, PAYLOAD> Trie<KEY, KEYCHECKER, PAYLOAD>
|
||||
where KEY : TrieKey, KEYCHECKER : KeyChecker<KEY> {
|
||||
pub fn empty(initializer : fn() -> PAYLOAD) -> Trie<KEY, KEYCHECKER, PAYLOAD> {
|
||||
Trie {
|
||||
nodes: vec!(TrieNode::new0(None, initializer)),
|
||||
payload_initializer: initializer,
|
||||
tails : BTreeSet::new(),
|
||||
checker : PhantomData::default()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn trie_from_words<T : IntoIterator, U : IntoIterator>(initializer : fn() -> PAYLOAD, wordlist : U) -> Trie<KEY, KEYCHECKER, PAYLOAD>
|
||||
where T : IntoIterator<Item=KEY>, U : IntoIterator<Item=T>
|
||||
{
|
||||
let mut result = Trie::empty(initializer);
|
||||
for word in wordlist {
|
||||
result.add(word);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn get_node_mut(&mut self, index : usize) -> &mut TrieNode<KEY, PAYLOAD> {
|
||||
&mut self.nodes[index]
|
||||
}
|
||||
|
||||
pub fn get_node(&self, index : usize) -> &TrieNode<KEY, PAYLOAD> {
|
||||
&self.nodes[index]
|
||||
}
|
||||
|
||||
pub fn nodes(&self) -> usize {
|
||||
self.nodes.len()
|
||||
}
|
||||
|
||||
fn add_node(&mut self, key : Option<KEY>, parent : usize, prev : Option<usize>) -> usize {
|
||||
let mut result = TrieNode::new0(key, self.payload_initializer);
|
||||
let result_index = self.nodes();
|
||||
result.parent = Some(parent);
|
||||
match prev {
|
||||
Some(prev_node) => {
|
||||
self.get_node_mut(prev_node).next = Some(result_index);
|
||||
result.prev = prev;
|
||||
}
|
||||
None => {
|
||||
let parent_node = self.get_node_mut(parent);
|
||||
match parent_node.child {
|
||||
None => {
|
||||
parent_node.child = Some(result_index);
|
||||
}
|
||||
Some(parent_child) => {
|
||||
let mut node = parent_child;
|
||||
loop {
|
||||
let next = self.get_node(node).next;
|
||||
match next {
|
||||
Some(next_node) => {
|
||||
node = next_node;
|
||||
}
|
||||
None => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
self.get_node_mut(node).next = Some(result_index);
|
||||
result.prev = Some(node)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
self.nodes.push(result);
|
||||
result_index
|
||||
}
|
||||
|
||||
pub fn add<T : IntoIterator>(&mut self, path : T) -> (bool, usize)
|
||||
where T: IntoIterator<Item = KEY>
|
||||
{
|
||||
let mut result = false;
|
||||
let mut pnode = 0;
|
||||
'wordLoop:
|
||||
for key in path {
|
||||
let mut cnode = self.get_node(pnode).child;
|
||||
loop {
|
||||
match cnode {
|
||||
Some(cnode_index) => {
|
||||
let cnode_node = self.get_node(cnode_index);
|
||||
if KEYCHECKER::check(cnode_node.key, Some(key)) {
|
||||
pnode = cnode_index;
|
||||
continue 'wordLoop;
|
||||
} else if self.get_node(cnode_index).next.is_none() {
|
||||
break;
|
||||
} else {
|
||||
cnode = self.get_node(cnode_index).next;
|
||||
}
|
||||
}
|
||||
None => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
pnode = self.add_node(Some(key), pnode, cnode);
|
||||
result = true;
|
||||
}
|
||||
if result {
|
||||
let tail = self.add_node(None, pnode, None);
|
||||
self.tails.insert(tail);
|
||||
let mut node = Some(tail);
|
||||
loop {
|
||||
match node {
|
||||
Some(n) => {
|
||||
let current_node = self.get_node_mut(n);
|
||||
current_node.ref_count += 1;
|
||||
node = current_node.parent;
|
||||
}
|
||||
None => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
(true, tail)
|
||||
} else {
|
||||
(false, pnode)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn search(&mut self, path : Vec<KEY>) -> Option<usize>
|
||||
{
|
||||
let mut result : Option<usize> = None;
|
||||
let visit_pre = |stack : &Vec<usize>| -> VisitOutcome {
|
||||
if stack.len() == 1 {
|
||||
VisitOutcome::Continue
|
||||
} else {
|
||||
let last = *stack.last().expect("");
|
||||
let index= stack.len() - 2;
|
||||
let node = self.get_node(last);
|
||||
if index < path.len() {
|
||||
if KEYCHECKER::check(node.key, Some(path[index])) {
|
||||
VisitOutcome::Continue
|
||||
} else {
|
||||
VisitOutcome::Skip
|
||||
}
|
||||
} else {
|
||||
if node.key.is_none() {
|
||||
result = Some(last);
|
||||
}
|
||||
VisitOutcome::EarlyExit
|
||||
}
|
||||
}
|
||||
};
|
||||
let visit_post = |stack : &Vec<usize>| {};
|
||||
self.walk(visit_pre, visit_post);
|
||||
result
|
||||
}
|
||||
|
||||
pub fn lineal_descendant(&self, start : usize) -> Vec<&KEY> {
|
||||
let mut chars : Vec<&KEY> = vec!();
|
||||
let mut node_option = Some(start);
|
||||
loop {
|
||||
match node_option {
|
||||
Some(node) => {
|
||||
let key = &self.get_node(node).key;
|
||||
match key {
|
||||
Some(key) => {
|
||||
chars.push(key);
|
||||
}
|
||||
None => {
|
||||
}
|
||||
}
|
||||
node_option = self.get_node(node).parent;
|
||||
}
|
||||
None => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
chars.reverse();
|
||||
chars
|
||||
}
|
||||
|
||||
pub fn walk<CB1, CB2>(&self, mut visit_pre : CB1, mut visit_post : CB2)
|
||||
where CB1: FnMut(&Vec<usize>) -> VisitOutcome,
|
||||
CB2: FnMut(&Vec<usize>) {
|
||||
let mut stack : Vec<(usize, Option<usize>)> = vec!();
|
||||
let mut public_stack : Vec<usize> = vec!();
|
||||
let root_node = self.get_node(0);
|
||||
stack.push((0, root_node.child));
|
||||
public_stack.push(0);
|
||||
visit_pre(&public_stack);
|
||||
while !stack.is_empty() {
|
||||
let last = &mut stack.last_mut().unwrap();
|
||||
match last.1 {
|
||||
Some(child_node_id) => {
|
||||
let child_node = self.get_node(child_node_id);
|
||||
last.1 = child_node.next;
|
||||
public_stack.push(child_node_id);
|
||||
let visit_pre_outcome = visit_pre(&public_stack);
|
||||
match visit_pre_outcome {
|
||||
VisitOutcome::Continue => {
|
||||
stack.push((child_node_id, child_node.child));
|
||||
}
|
||||
VisitOutcome::Skip => {
|
||||
stack.push((child_node_id, None));
|
||||
}
|
||||
VisitOutcome::EarlyExit => {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
visit_post(&public_stack);
|
||||
stack.pop();
|
||||
public_stack.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tails(&self) -> &BTreeSet<usize> {
|
||||
&self.tails
|
||||
}
|
||||
}
|
||||
|
37
src/levtree/trienode.rs
Normal file
37
src/levtree/trienode.rs
Normal file
@@ -0,0 +1,37 @@
|
||||
|
||||
trait_group! {
|
||||
pub trait TrieKey : std::marker::Copy + std::fmt::Display + Sized
|
||||
}
|
||||
|
||||
// pub trait KeyPath<KEY: TrieKey> : std::ops::Index<usize> + IntoIterator<Item = KEY> {}
|
||||
|
||||
|
||||
pub struct TrieNode<KEY, PAYLOAD> where KEY : TrieKey {
|
||||
pub key: Option<KEY>,
|
||||
pub payload: PAYLOAD,
|
||||
pub prev: Option<usize>,
|
||||
pub next: Option<usize>,
|
||||
pub child: Option<usize>,
|
||||
pub parent: Option<usize>,
|
||||
pub ref_count: usize
|
||||
}
|
||||
|
||||
impl <KEY, PAYLOAD> TrieNode<KEY, PAYLOAD> where KEY : TrieKey {
|
||||
fn new<U>(key: Option<KEY>, payload_initializer : U, prev: Option<usize>, next: Option<usize>, parent: Option<usize>, child : Option<usize>) -> TrieNode<KEY, PAYLOAD>
|
||||
where U : Fn() -> PAYLOAD {
|
||||
TrieNode {
|
||||
key,
|
||||
payload: payload_initializer(),
|
||||
prev: prev,
|
||||
next: next,
|
||||
child: child,
|
||||
parent: parent,
|
||||
ref_count: 0
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new0<U>(key: Option<KEY>, payload_initializer : U) -> TrieNode<KEY, PAYLOAD>
|
||||
where U : Fn() -> PAYLOAD {
|
||||
TrieNode::new(key, payload_initializer, None, None, None, None)
|
||||
}
|
||||
}
|
20
src/main.rs
20
src/main.rs
@@ -1,20 +0,0 @@
|
||||
mod levtree;
|
||||
|
||||
use std::fs::File;
|
||||
//use std::io::prelude::*;
|
||||
use std::io::BufReader;
|
||||
use std::io::BufRead;
|
||||
|
||||
fn main() {
|
||||
//let mut words : Vec<String> = Vec::new();
|
||||
//let infile : File = try!(File::open("/usr/share/dict/cracklib-small"));
|
||||
let filename = "/usr/share/dict/cracklib-small";
|
||||
let infile : File = File::open(filename).expect(
|
||||
&format!("Error opening {}:", &filename)
|
||||
);
|
||||
let reader = BufReader::new(infile);
|
||||
let tree = levtree::Tree::new(
|
||||
reader.lines()
|
||||
.map(|line| line.unwrap())
|
||||
.collect());
|
||||
}
|
55
src/test.rs
55
src/test.rs
@@ -1,6 +1,55 @@
|
||||
|
||||
|
||||
fn main(){
|
||||
let mut ints = vec!(1,2,3,4,5,6);
|
||||
ints[4] = 2;
|
||||
use std::collections::HashMap;
|
||||
|
||||
// This struct has one lifetime parameter, 'src. The name is only used within the struct's definition.
|
||||
#[derive(Debug)]
|
||||
struct Config<'src> {
|
||||
hostname: &'src str,
|
||||
username: &'src str,
|
||||
}
|
||||
|
||||
// This function also has a lifetime parameter, 'cfg. 'cfg is attached to the "config" parameter, which
|
||||
// establishes that the data in "config" lives at least as long as the 'cfg lifetime.
|
||||
// The returned struct also uses 'cfg for its lifetime, so it can live at most as long as 'cfg.
|
||||
fn parse_config<'cfg>(config: &'cfg str) -> Config<'cfg> {
|
||||
let key_values: HashMap<_, _> = config
|
||||
.lines()
|
||||
.filter(|line| !line.starts_with('#'))
|
||||
.filter_map(|line| line.split_once('='))
|
||||
.map(|(key, value)| (key.trim(), value.trim()))
|
||||
.collect();
|
||||
Config {
|
||||
hostname: key_values["hostname"],
|
||||
username: key_values["username"],
|
||||
}
|
||||
}
|
||||
|
||||
// fn main() {
|
||||
// let config = parse_config(
|
||||
// r#"hostname = foobar
|
||||
// username=barfoo"#,
|
||||
// );
|
||||
// println!("Parsed config: {:#?}", config);
|
||||
// }
|
||||
|
||||
|
||||
struct Foo {}
|
||||
|
||||
fn foo<T>(v : &[T]) {
|
||||
for el in v.into_iter() {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// fn foo2(v : &mut Vec<Vec<isize>>) {
|
||||
// let first = &v[0];
|
||||
// let second = &mut v[1];
|
||||
|
||||
// second[0] = first[1] + second[2];
|
||||
// }
|
||||
fn main() {
|
||||
let s = String::from("🤑😂🤩🤬");
|
||||
let c= s.chars().nth(1).unwrap();
|
||||
println!("{}", c);
|
||||
}
|
Reference in New Issue
Block a user