module refactor

This commit is contained in:
2023-12-18 11:40:03 +08:00
parent 9a522ccb07
commit 9291180936
14 changed files with 640 additions and 591 deletions

View File

@@ -13,17 +13,14 @@ sealed = "0.5"
name = "levtree"
crate-type = ["lib"]
bench = false
path = "src/levtree/mod.rs"
[[bin]]
name = "linked_list"
path = "src/linked_list.rs"
[[example]]
name = "levtree_example"
path = "examples/levtree_example.rs"
[[bin]]
name = "test"
path = "src/test.rs"
name = "levtree_benchmark"
path = "examples/benchmark.rs"
[profile.release]
strip = true
lto = true
debug-assertions = false
codegen-units = 1

59
examples/benchmark.rs Normal file
View File

@@ -0,0 +1,59 @@
extern crate levtree;
use levtree::CaseSensitiveKeyChecker;
use levtree::CaseSensitiveLevTrie;
use levtree::DamerauLevenshteinDistanceCalculator;
use levtree::LevTrie;
use levtree::Trie;
use std::io::BufRead;
use std::io::BufReader;
trait IntoCharSlice {
fn into_char_slice(&self) -> Vec<char>;
}
impl IntoCharSlice for str {
fn into_char_slice(&self) -> Vec<char> {
self.chars().into_iter().collect::<Vec<_>>()
}
}
fn main() {
let bytes = include_bytes!("cracklib-small");
let reader = BufReader::new(&bytes[..]);
let mut trie: CaseSensitiveLevTrie = LevTrie::new();
reader
.lines()
.map(|line| line.unwrap())
.for_each(|word: String| {
trie.add(word.chars());
});
let keys = [
"camel",
"coriolis",
"mattel",
"cruzer",
"cpoper",
"roublesoot",
];
for _ in 0..50 {
for key in keys {
let word = &key.into_char_slice()[..];
trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
}
}
for key in keys {
let word = &key.into_char_slice()[..];
let results = trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
for result in results {
let word: String = trie.lineal_descendant(result.word).into_iter().collect();
println!("distance: {}, wordkey: {}", result.distance, word);
}
println!("")
}
}

View File

@@ -1,50 +0,0 @@
extern crate levtree;
use levtree::LevTrie;
use levtree::Trie;
use levtree::CaseSensitiveKeyChecker;
use levtree::CaseSensitiveLevTrie;
use levtree::DamerauLevenshteinDistanceCalculator;
use std::io::BufReader;
use std::io::BufRead;
trait IntoCharSlice {
fn into_char_slice(&self) -> Vec<char>;
}
impl IntoCharSlice for str {
fn into_char_slice(&self) -> Vec<char> {
self.chars().into_iter().collect::<Vec<_>>()
}
}
fn main() {
let bytes = include_bytes!("cracklib-small");
let reader = BufReader::new(&bytes[..]);
let mut trie : CaseSensitiveLevTrie = LevTrie::new();
reader.lines()
.map(|line| line.unwrap())
.for_each(|word : String| {
trie.add(word.chars());
});
let keys = ["camel", "coriolis", "mattel", "cruzer", "cpoper", "roublesoot"];
for _ in 0..50 {
for key in keys {
let word = &key.into_char_slice()[..];
trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
}
}
for key in keys {
let word = &key.into_char_slice()[..];
let results = trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
for result in results {
let word : String = trie.lineal_descendant(result.word).into_iter().collect();
println!("distance: {}, wordkey: {}", result.distance, word);
}
println!("")
}
}

26
src/keychecker.rs Normal file
View File

@@ -0,0 +1,26 @@
use super::trienode::TrieKey;
pub trait KeyChecker<KEY>
where
KEY: TrieKey,
{
fn check(k1: Option<KEY>, k2: Option<KEY>) -> bool;
}
pub struct CaseInsensitiveKeyChecker {}
impl KeyChecker<char> for CaseInsensitiveKeyChecker {
fn check(k1: Option<char>, k2: Option<char>) -> bool {
k1.zip(k2)
.map(|(v1, v2)| v1.to_lowercase().next() == v2.to_lowercase().next())
.unwrap_or_else(|| k1 == k2)
}
}
pub struct CaseSensitiveKeyChecker {}
impl KeyChecker<char> for CaseSensitiveKeyChecker {
fn check(k1: Option<char>, k2: Option<char>) -> bool {
k1 == k2
}
}

View File

@@ -1,27 +0,0 @@
use super::trienode::TrieKey;
pub trait KeyChecker<KEY>
where KEY : TrieKey {
fn check(k1 : Option<KEY>, k2 : Option<KEY>) -> bool;
}
pub struct CaseInsensitiveKeyChecker {}
impl KeyChecker<char> for CaseInsensitiveKeyChecker {
fn check(k1 : Option<char>, k2 : Option<char>) -> bool {
k1.zip(k2)
.map(| (v1, v2) | {
v1.to_lowercase().next() == v2.to_lowercase().next()
})
.unwrap_or_else(|| { k1 == k2} )
}
}
pub struct CaseSensitiveKeyChecker {}
impl KeyChecker<char> for CaseSensitiveKeyChecker {
fn check(k1 : Option<char>, k2 : Option<char>) -> bool {
k1 == k2
}
}

View File

@@ -1,191 +0,0 @@
extern crate sealed;
use std::collections::BTreeSet;
use self::sealed::sealed;
use super::trie::Trie;
use super::trie::VisitOutcome;
use super::trienode::TrieNode;
use super::trienode::TrieKey;
use super::keychecker::KeyChecker;
use super::result::Result;
pub type LevTrie<KEY, KEYCHECKER,> = Trie<KEY, KEYCHECKER, ()>;
pub type LevTrieNode<KEY> = TrieNode<KEY, ()>;
#[sealed]
pub trait DistanceCalculator<KEY, KEYCHECKER>
where KEY : TrieKey,
KEYCHECKER : KeyChecker<KEY>,
{
fn compute<>(
workspace: &mut Vec<Vec<usize>>,
nodes: &Vec<LevTrieNode<KEY>>,
stack: &Vec<usize>,
wordkey : &[KEY],
worst_case : Option<usize>) -> VisitOutcome;
}
impl <KEY, KEYCHECKER> LevTrie<KEY, KEYCHECKER>
where KEY : TrieKey, KEYCHECKER : KeyChecker<KEY> {
pub fn new() -> LevTrie<KEY, KEYCHECKER> {
Trie::empty(|| {})
}
pub fn from_words<T : IntoIterator, U : IntoIterator>(wordlist : U) -> LevTrie<KEY, KEYCHECKER>
where T : IntoIterator<Item=KEY>, U : IntoIterator<Item=T>
{
let mut result = LevTrie::new();
for word in wordlist {
result.add(word);
}
result
}
pub fn fuzzy_search<DC>(&mut self, word: &[KEY], max_result: usize) -> BTreeSet<Result>
where DC : DistanceCalculator<KEY, KEYCHECKER> {
let word_len = word.into_iter().count();
let mut workspace : &mut Vec<Vec<usize>> = &mut (0..self.nodes()).map(|_| { Vec::new() }).collect();
let mut results = BTreeSet::new();
let required_size= word_len + 1;
let visit_pre = |stack : &Vec<usize>| -> VisitOutcome {
let stack_size = stack.len();
let current_node_id = *stack.last().unwrap();
let payload = &mut workspace[current_node_id];
payload.resize(required_size, usize::default());
if stack_size == 1 {
for i in 0..required_size {
payload[i] = i;
}
} else {
for i in 0..required_size {
payload[i] = if i == 0 {
stack_size - 1
} else {
0
}
}
}
if stack_size > 1 {
let current_node = &mut self.get_node(current_node_id);
if current_node.key.is_none() {
let distance = workspace[stack[stack_size - 2]][word_len];
results.insert(Result {
distance: distance,
word: current_node_id
});
if results.len() > max_result {
results.pop_last();
}
VisitOutcome::Skip
} else {
let worst_case = results.last()
.filter(|_| { results.len() == max_result })
.map(|it| { it.distance });
DC::compute(&mut workspace, &self.nodes, stack, word, worst_case)
}
} else {
VisitOutcome::Continue
}
};
let visit_post = |_ : &Vec<usize>| {
};
self.walk(visit_pre, visit_post);
results
}
}
pub struct LevenshteinDistanceCalculator {}
#[sealed]
impl <KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for LevenshteinDistanceCalculator
where
KEY : TrieKey,
KEYCHECKER : KeyChecker<KEY>
{
fn compute<>(
workspace: &mut Vec<Vec<usize>>,
nodes: &Vec<LevTrieNode<KEY>>,
stack: &Vec<usize>,
wordkey : &[KEY],
worst_case : Option<usize>) -> VisitOutcome {
let sz = stack.len();
let key_size = wordkey.into_iter().count();
for i in 1..=key_size {
if KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 1]].key) {
workspace[stack[sz - 1]][i] = workspace[stack[sz - 2]][i - 1];
} else {
workspace[stack[sz - 1]][i] = std::cmp::min(
std::cmp::min(
workspace[stack[sz - 1]][i - 1],
workspace[stack[sz - 2]][i -1]
),
workspace[stack[sz - 2]][i]
) + 1;
}
}
let condition = worst_case.map(
|wv| {
wv <= *workspace[stack[sz - 1]][..].into_iter().min().unwrap()
}).unwrap_or(false);
if condition {
VisitOutcome::Skip
} else {
VisitOutcome::Continue
}
}
}
pub struct DamerauLevenshteinDistanceCalculator {}
#[sealed]
impl <KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for DamerauLevenshteinDistanceCalculator
where
KEY : TrieKey,
KEYCHECKER : KeyChecker<KEY>
{
fn compute<>(
workspace: &mut Vec<Vec<usize>>,
nodes: &Vec<LevTrieNode<KEY>>,
stack: &Vec<usize>,
wordkey : &[KEY],
worst_case : Option<usize>) -> VisitOutcome {
let sz = stack.len();
let key_size = wordkey.into_iter().count();
for i in 1..=key_size {
if KEYCHECKER::check(Some(wordkey[i - 1]),
stack.last().and_then(|it| {nodes[*it].key})) {
workspace[stack[sz - 1]][i] = workspace[stack[sz - 2]][i - 1];
} else {
workspace[stack[sz - 1]][i] = std::cmp::min(
std::cmp::min(
workspace[stack[sz - 1]][i - 1],
workspace[stack[sz - 2]][i - 1]
),
workspace[stack[sz - 2]][i]
) + 1;
}
if sz > 2 &&
i > 1 &&
KEYCHECKER::check(Some(wordkey[i - 2]), nodes[stack[sz - 1]].key) &&
KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 2]].key) {
workspace[stack[sz - 1]][i] = std::cmp::min(
workspace[stack[sz - 1]][i],
workspace[stack[sz - 3]][i - 2] + 1,
);
}
}
let condition = worst_case.map(
|wv| {
wv <= *workspace[stack[sz - 2]][..].into_iter().min().unwrap()
}).unwrap_or(false);
if condition {
VisitOutcome::Skip
} else {
VisitOutcome::Continue
}
}
}

View File

@@ -1,59 +0,0 @@
use std::cmp::Ordering;
pub struct Result {
pub word: usize,
pub distance: usize,
}
impl PartialOrd for Result {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.distance.cmp(&other.distance))
.filter(|it| { it != &Ordering::Equal})
.or_else(|| { Some(self.word.cmp(&other.word)) })
}
}
impl PartialEq for Result {
fn eq(&self, other: &Self) -> bool {
self.distance == other.distance && self.word == other.word
}
}
impl Eq for Result {
}
impl Ord for Result {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
match self.distance.cmp(&other.distance) {
std::cmp::Ordering::Equal => {
self.word.cmp(&other.word)
}
std::cmp::Ordering::Greater => {
std::cmp::Ordering::Greater
}
std::cmp::Ordering::Less => {
std::cmp::Ordering::Less
}
}
}
}
//struct Standing {
// size: usize,
// results: Vec<Result>,
//}
//
//impl Standing {
// pub fn new(size: usize) -> Standing {
// Standing {
// size,
// results: BTreeSet::new(),
// }
// }
//
// pub fn addResult(&mut self, res: Result) {
// self.results.push(res)
// }
//}

View File

@@ -1,243 +0,0 @@
use std::collections::BTreeSet;
use std::collections::HashMap;
use std::marker::PhantomData;
use super::trienode::TrieNode;
use super::trienode::TrieKey;
use super::levtrie::DistanceCalculator;
use super::keychecker::KeyChecker;
use super::result::Result;
pub enum VisitOutcome {
Continue,
Skip,
EarlyExit,
}
pub struct Trie<KEY, KEYCHECKER, PAYLOAD>
where KEY : TrieKey, KEYCHECKER : KeyChecker<KEY> {
pub nodes : Vec<TrieNode<KEY, PAYLOAD>>,
payload_initializer : fn() -> PAYLOAD,
tails : BTreeSet<usize>,
checker : PhantomData<KEYCHECKER>
}
impl <KEY, KEYCHECKER, PAYLOAD> Trie<KEY, KEYCHECKER, PAYLOAD>
where KEY : TrieKey, KEYCHECKER : KeyChecker<KEY> {
pub fn empty(initializer : fn() -> PAYLOAD) -> Trie<KEY, KEYCHECKER, PAYLOAD> {
Trie {
nodes: vec!(TrieNode::new0(None, initializer)),
payload_initializer: initializer,
tails : BTreeSet::new(),
checker : PhantomData::default()
}
}
pub fn trie_from_words<T : IntoIterator, U : IntoIterator>(initializer : fn() -> PAYLOAD, wordlist : U) -> Trie<KEY, KEYCHECKER, PAYLOAD>
where T : IntoIterator<Item=KEY>, U : IntoIterator<Item=T>
{
let mut result = Trie::empty(initializer);
for word in wordlist {
result.add(word);
}
result
}
pub fn get_node_mut(&mut self, index : usize) -> &mut TrieNode<KEY, PAYLOAD> {
&mut self.nodes[index]
}
pub fn get_node(&self, index : usize) -> &TrieNode<KEY, PAYLOAD> {
&self.nodes[index]
}
pub fn nodes(&self) -> usize {
self.nodes.len()
}
fn add_node(&mut self, key : Option<KEY>, parent : usize, prev : Option<usize>) -> usize {
let mut result = TrieNode::new0(key, self.payload_initializer);
let result_index = self.nodes();
result.parent = Some(parent);
match prev {
Some(prev_node) => {
self.get_node_mut(prev_node).next = Some(result_index);
result.prev = prev;
}
None => {
let parent_node = self.get_node_mut(parent);
match parent_node.child {
None => {
parent_node.child = Some(result_index);
}
Some(parent_child) => {
let mut node = parent_child;
loop {
let next = self.get_node(node).next;
match next {
Some(next_node) => {
node = next_node;
}
None => {
break;
}
}
}
self.get_node_mut(node).next = Some(result_index);
result.prev = Some(node)
}
}
}
}
self.nodes.push(result);
result_index
}
pub fn add<T : IntoIterator>(&mut self, path : T) -> (bool, usize)
where T: IntoIterator<Item = KEY>
{
let mut result = false;
let mut pnode = 0;
'wordLoop:
for key in path {
let mut cnode = self.get_node(pnode).child;
loop {
match cnode {
Some(cnode_index) => {
let cnode_node = self.get_node(cnode_index);
if KEYCHECKER::check(cnode_node.key, Some(key)) {
pnode = cnode_index;
continue 'wordLoop;
} else if self.get_node(cnode_index).next.is_none() {
break;
} else {
cnode = self.get_node(cnode_index).next;
}
}
None => {
break;
}
}
}
pnode = self.add_node(Some(key), pnode, cnode);
result = true;
}
if result {
let tail = self.add_node(None, pnode, None);
self.tails.insert(tail);
let mut node = Some(tail);
loop {
match node {
Some(n) => {
let current_node = self.get_node_mut(n);
current_node.ref_count += 1;
node = current_node.parent;
}
None => {
break;
}
}
}
(true, tail)
} else {
(false, pnode)
}
}
pub fn search(&mut self, path : Vec<KEY>) -> Option<usize>
{
let mut result : Option<usize> = None;
let visit_pre = |stack : &Vec<usize>| -> VisitOutcome {
if stack.len() == 1 {
VisitOutcome::Continue
} else {
let last = *stack.last().expect("");
let index= stack.len() - 2;
let node = self.get_node(last);
if index < path.len() {
if KEYCHECKER::check(node.key, Some(path[index])) {
VisitOutcome::Continue
} else {
VisitOutcome::Skip
}
} else {
if node.key.is_none() {
result = Some(last);
}
VisitOutcome::EarlyExit
}
}
};
let visit_post = |stack : &Vec<usize>| {};
self.walk(visit_pre, visit_post);
result
}
pub fn lineal_descendant(&self, start : usize) -> Vec<&KEY> {
let mut chars : Vec<&KEY> = vec!();
let mut node_option = Some(start);
loop {
match node_option {
Some(node) => {
let key = &self.get_node(node).key;
match key {
Some(key) => {
chars.push(key);
}
None => {
}
}
node_option = self.get_node(node).parent;
}
None => {
break;
}
}
}
chars.reverse();
chars
}
pub fn walk<CB1, CB2>(&self, mut visit_pre : CB1, mut visit_post : CB2)
where CB1: FnMut(&Vec<usize>) -> VisitOutcome,
CB2: FnMut(&Vec<usize>) {
let mut stack : Vec<(usize, Option<usize>)> = vec!();
let mut public_stack : Vec<usize> = vec!();
let root_node = self.get_node(0);
stack.push((0, root_node.child));
public_stack.push(0);
visit_pre(&public_stack);
while !stack.is_empty() {
let last = &mut stack.last_mut().unwrap();
match last.1 {
Some(child_node_id) => {
let child_node = self.get_node(child_node_id);
last.1 = child_node.next;
public_stack.push(child_node_id);
let visit_pre_outcome = visit_pre(&public_stack);
match visit_pre_outcome {
VisitOutcome::Continue => {
stack.push((child_node_id, child_node.child));
}
VisitOutcome::Skip => {
stack.push((child_node_id, None));
}
VisitOutcome::EarlyExit => {
return
}
}
}
None => {
visit_post(&public_stack);
stack.pop();
public_stack.pop();
}
}
}
}
pub fn tails(&self) -> &BTreeSet<usize> {
&self.tails
}
}

196
src/levtrie.rs Normal file
View File

@@ -0,0 +1,196 @@
extern crate sealed;
use self::sealed::sealed;
use std::collections::BTreeSet;
use super::keychecker::KeyChecker;
use super::result::Result;
use super::trie::Trie;
use super::trie::VisitOutcome;
use super::trienode::TrieKey;
use super::trienode::TrieNode;
pub type LevTrie<KEY, KEYCHECKER> = Trie<KEY, KEYCHECKER, ()>;
pub type LevTrieNode<KEY> = TrieNode<KEY, ()>;
#[sealed]
pub trait DistanceCalculator<KEY, KEYCHECKER>
where
KEY: TrieKey,
KEYCHECKER: KeyChecker<KEY>,
{
fn compute(
workspace: &mut Vec<Vec<usize>>,
nodes: &Vec<LevTrieNode<KEY>>,
stack: &Vec<usize>,
wordkey: &[KEY],
worst_case: Option<usize>,
) -> VisitOutcome;
}
impl<KEY, KEYCHECKER> LevTrie<KEY, KEYCHECKER>
where
KEY: TrieKey,
KEYCHECKER: KeyChecker<KEY>,
{
pub fn new() -> LevTrie<KEY, KEYCHECKER> {
Trie::empty(|| {})
}
pub fn from_words<T: IntoIterator, U: IntoIterator>(wordlist: U) -> LevTrie<KEY, KEYCHECKER>
where
T: IntoIterator<Item = KEY>,
U: IntoIterator<Item = T>,
{
let mut result = LevTrie::new();
for word in wordlist {
result.add(word);
}
result
}
pub fn fuzzy_search<DC>(&mut self, word: &[KEY], max_result: usize) -> BTreeSet<Result>
where
DC: DistanceCalculator<KEY, KEYCHECKER>,
{
let word_len = word.into_iter().count();
let mut workspace: &mut Vec<Vec<usize>> =
&mut (0..self.nodes()).map(|_| Vec::new()).collect();
let mut results = BTreeSet::new();
let required_size = word_len + 1;
let visit_pre = |stack: &Vec<usize>| -> VisitOutcome {
let stack_size = stack.len();
let current_node_id = *stack.last().unwrap();
let payload = &mut workspace[current_node_id];
payload.resize(required_size, usize::default());
if stack_size == 1 {
for i in 0..required_size {
payload[i] = i;
}
} else {
for i in 0..required_size {
payload[i] = if i == 0 { stack_size - 1 } else { 0 }
}
}
if stack_size > 1 {
let current_node = &mut self.get_node(current_node_id);
if current_node.key.is_none() {
let distance = workspace[stack[stack_size - 2]][word_len];
results.insert(Result {
distance: distance,
word: current_node_id,
});
if results.len() > max_result {
results.pop_last();
}
VisitOutcome::Skip
} else {
let worst_case = results
.last()
.filter(|_| results.len() == max_result)
.map(|it| it.distance);
DC::compute(&mut workspace, &self.nodes, stack, word, worst_case)
}
} else {
VisitOutcome::Continue
}
};
let visit_post = |_: &Vec<usize>| {};
self.walk(visit_pre, visit_post);
results
}
}
pub struct LevenshteinDistanceCalculator {}
#[sealed]
impl<KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for LevenshteinDistanceCalculator
where
KEY: TrieKey,
KEYCHECKER: KeyChecker<KEY>,
{
fn compute(
workspace: &mut Vec<Vec<usize>>,
nodes: &Vec<LevTrieNode<KEY>>,
stack: &Vec<usize>,
wordkey: &[KEY],
worst_case: Option<usize>,
) -> VisitOutcome {
let sz = stack.len();
let key_size = wordkey.into_iter().count();
for i in 1..=key_size {
if KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 1]].key) {
workspace[stack[sz - 1]][i] = workspace[stack[sz - 2]][i - 1];
} else {
workspace[stack[sz - 1]][i] = std::cmp::min(
std::cmp::min(
workspace[stack[sz - 1]][i - 1],
workspace[stack[sz - 2]][i - 1],
),
workspace[stack[sz - 2]][i],
) + 1;
}
}
let condition = worst_case
.map(|wv| wv <= *workspace[stack[sz - 1]][..].into_iter().min().unwrap())
.unwrap_or(false);
if condition {
VisitOutcome::Skip
} else {
VisitOutcome::Continue
}
}
}
pub struct DamerauLevenshteinDistanceCalculator {}
#[sealed]
impl<KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for DamerauLevenshteinDistanceCalculator
where
KEY: TrieKey,
KEYCHECKER: KeyChecker<KEY>,
{
fn compute(
workspace: &mut Vec<Vec<usize>>,
nodes: &Vec<LevTrieNode<KEY>>,
stack: &Vec<usize>,
wordkey: &[KEY],
worst_case: Option<usize>,
) -> VisitOutcome {
let sz = stack.len();
let key_size = wordkey.into_iter().count();
for i in 1..=key_size {
if KEYCHECKER::check(
Some(wordkey[i - 1]),
stack.last().and_then(|it| nodes[*it].key),
) {
workspace[stack[sz - 1]][i] = workspace[stack[sz - 2]][i - 1];
} else {
workspace[stack[sz - 1]][i] = std::cmp::min(
std::cmp::min(
workspace[stack[sz - 1]][i - 1],
workspace[stack[sz - 2]][i - 1],
),
workspace[stack[sz - 2]][i],
) + 1;
}
if sz > 2
&& i > 1
&& KEYCHECKER::check(Some(wordkey[i - 2]), nodes[stack[sz - 1]].key)
&& KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 2]].key)
{
workspace[stack[sz - 1]][i] = std::cmp::min(
workspace[stack[sz - 1]][i],
workspace[stack[sz - 3]][i - 2] + 1,
);
}
}
let condition = worst_case
.map(|wv| wv <= *workspace[stack[sz - 2]][..].into_iter().min().unwrap())
.unwrap_or(false);
if condition {
VisitOutcome::Skip
} else {
VisitOutcome::Continue
}
}
}

25
src/lib.rs Normal file
View File

@@ -0,0 +1,25 @@
#[macro_use]
extern crate trait_group;
mod levtrie;
// pub use self::levtrie::LevTrieNode as LevTrieNode;
pub use self::levtrie::DamerauLevenshteinDistanceCalculator;
pub use self::levtrie::DistanceCalculator;
pub use self::levtrie::LevTrie;
pub use self::levtrie::LevenshteinDistanceCalculator;
mod trie;
pub use self::trie::Trie;
mod trienode;
//use self::trienode::TrieNode as TrieNode;
mod keychecker;
pub use self::keychecker::CaseInsensitiveKeyChecker;
pub use self::keychecker::CaseSensitiveKeyChecker;
pub use self::keychecker::KeyChecker;
pub type CaseSensitiveLevTrie = LevTrie<char, CaseSensitiveKeyChecker>;
pub type CaseInSensitiveLevTrie = LevTrie<char, CaseInsensitiveKeyChecker>;
mod result;
pub use self::result::Result;

50
src/result.rs Normal file
View File

@@ -0,0 +1,50 @@
use std::cmp::Ordering;
pub struct Result {
pub word: usize,
pub distance: usize,
}
impl PartialOrd for Result {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.distance.cmp(&other.distance))
.filter(|it| it != &Ordering::Equal)
.or_else(|| Some(self.word.cmp(&other.word)))
}
}
impl PartialEq for Result {
fn eq(&self, other: &Self) -> bool {
self.distance == other.distance && self.word == other.word
}
}
impl Eq for Result {}
impl Ord for Result {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
match self.distance.cmp(&other.distance) {
std::cmp::Ordering::Equal => self.word.cmp(&other.word),
std::cmp::Ordering::Greater => std::cmp::Ordering::Greater,
std::cmp::Ordering::Less => std::cmp::Ordering::Less,
}
}
}
//struct Standing {
// size: usize,
// results: Vec<Result>,
//}
//
//impl Standing {
// pub fn new(size: usize) -> Standing {
// Standing {
// size,
// results: BTreeSet::new(),
// }
// }
//
// pub fn addResult(&mut self, res: Result) {
// self.results.push(res)
// }
//}

251
src/trie.rs Normal file
View File

@@ -0,0 +1,251 @@
use std::collections::BTreeSet;
use std::collections::HashMap;
use std::marker::PhantomData;
use super::keychecker::KeyChecker;
use super::levtrie::DistanceCalculator;
use super::result::Result;
use super::trienode::TrieKey;
use super::trienode::TrieNode;
pub enum VisitOutcome {
Continue,
Skip,
EarlyExit,
}
pub struct Trie<KEY, KEYCHECKER, PAYLOAD>
where
KEY: TrieKey,
KEYCHECKER: KeyChecker<KEY>,
{
pub nodes: Vec<TrieNode<KEY, PAYLOAD>>,
payload_initializer: fn() -> PAYLOAD,
tails: BTreeSet<usize>,
checker: PhantomData<KEYCHECKER>,
}
impl<KEY, KEYCHECKER, PAYLOAD> Trie<KEY, KEYCHECKER, PAYLOAD>
where
KEY: TrieKey,
KEYCHECKER: KeyChecker<KEY>,
{
pub fn empty(initializer: fn() -> PAYLOAD) -> Trie<KEY, KEYCHECKER, PAYLOAD> {
Trie {
nodes: vec![TrieNode::new0(None, initializer)],
payload_initializer: initializer,
tails: BTreeSet::new(),
checker: PhantomData::default(),
}
}
pub fn trie_from_words<T: IntoIterator, U: IntoIterator>(
initializer: fn() -> PAYLOAD,
wordlist: U,
) -> Trie<KEY, KEYCHECKER, PAYLOAD>
where
T: IntoIterator<Item = KEY>,
U: IntoIterator<Item = T>,
{
let mut result = Trie::empty(initializer);
for word in wordlist {
result.add(word);
}
result
}
pub fn get_node_mut(&mut self, index: usize) -> &mut TrieNode<KEY, PAYLOAD> {
&mut self.nodes[index]
}
pub fn get_node(&self, index: usize) -> &TrieNode<KEY, PAYLOAD> {
&self.nodes[index]
}
pub fn nodes(&self) -> usize {
self.nodes.len()
}
fn add_node(&mut self, key: Option<KEY>, parent: usize, prev: Option<usize>) -> usize {
let mut result = TrieNode::new0(key, self.payload_initializer);
let result_index = self.nodes();
result.parent = Some(parent);
match prev {
Some(prev_node) => {
self.get_node_mut(prev_node).next = Some(result_index);
result.prev = prev;
}
None => {
let parent_node = self.get_node_mut(parent);
match parent_node.child {
None => {
parent_node.child = Some(result_index);
}
Some(parent_child) => {
let mut node = parent_child;
loop {
let next = self.get_node(node).next;
match next {
Some(next_node) => {
node = next_node;
}
None => {
break;
}
}
}
self.get_node_mut(node).next = Some(result_index);
result.prev = Some(node)
}
}
}
}
self.nodes.push(result);
result_index
}
pub fn add<T: IntoIterator>(&mut self, path: T) -> (bool, usize)
where
T: IntoIterator<Item = KEY>,
{
let mut result = false;
let mut pnode = 0;
'wordLoop: for key in path {
let mut cnode = self.get_node(pnode).child;
loop {
match cnode {
Some(cnode_index) => {
let cnode_node = self.get_node(cnode_index);
if KEYCHECKER::check(cnode_node.key, Some(key)) {
pnode = cnode_index;
continue 'wordLoop;
} else if self.get_node(cnode_index).next.is_none() {
break;
} else {
cnode = self.get_node(cnode_index).next;
}
}
None => {
break;
}
}
}
pnode = self.add_node(Some(key), pnode, cnode);
result = true;
}
if result {
let tail = self.add_node(None, pnode, None);
self.tails.insert(tail);
let mut node = Some(tail);
loop {
match node {
Some(n) => {
let current_node = self.get_node_mut(n);
current_node.ref_count += 1;
node = current_node.parent;
}
None => {
break;
}
}
}
(true, tail)
} else {
(false, pnode)
}
}
pub fn search(&mut self, path: Vec<KEY>) -> Option<usize> {
let mut result: Option<usize> = None;
let visit_pre = |stack: &Vec<usize>| -> VisitOutcome {
if stack.len() == 1 {
VisitOutcome::Continue
} else {
let last = *stack.last().expect("");
let index = stack.len() - 2;
let node = self.get_node(last);
if index < path.len() {
if KEYCHECKER::check(node.key, Some(path[index])) {
VisitOutcome::Continue
} else {
VisitOutcome::Skip
}
} else {
if node.key.is_none() {
result = Some(last);
}
VisitOutcome::EarlyExit
}
}
};
let visit_post = |stack: &Vec<usize>| {};
self.walk(visit_pre, visit_post);
result
}
pub fn lineal_descendant(&self, start: usize) -> Vec<&KEY> {
let mut chars: Vec<&KEY> = vec![];
let mut node_option = Some(start);
loop {
match node_option {
Some(node) => {
let key = &self.get_node(node).key;
match key {
Some(key) => {
chars.push(key);
}
None => {}
}
node_option = self.get_node(node).parent;
}
None => {
break;
}
}
}
chars.reverse();
chars
}
pub fn walk<CB1, CB2>(&self, mut visit_pre: CB1, mut visit_post: CB2)
where
CB1: FnMut(&Vec<usize>) -> VisitOutcome,
CB2: FnMut(&Vec<usize>),
{
let mut stack: Vec<(usize, Option<usize>)> = vec![];
let mut public_stack: Vec<usize> = vec![];
let root_node = self.get_node(0);
stack.push((0, root_node.child));
public_stack.push(0);
visit_pre(&public_stack);
while !stack.is_empty() {
let last = &mut stack.last_mut().unwrap();
match last.1 {
Some(child_node_id) => {
let child_node = self.get_node(child_node_id);
last.1 = child_node.next;
public_stack.push(child_node_id);
let visit_pre_outcome = visit_pre(&public_stack);
match visit_pre_outcome {
VisitOutcome::Continue => {
stack.push((child_node_id, child_node.child));
}
VisitOutcome::Skip => {
stack.push((child_node_id, None));
}
VisitOutcome::EarlyExit => return,
}
}
None => {
visit_post(&public_stack);
stack.pop();
public_stack.pop();
}
}
}
}
pub fn tails(&self) -> &BTreeSet<usize> {
&self.tails
}
}

View File

@@ -1,24 +1,37 @@
trait_group! {
pub trait TrieKey : std::marker::Copy + std::fmt::Display + Sized
}
// pub trait KeyPath<KEY: TrieKey> : std::ops::Index<usize> + IntoIterator<Item = KEY> {}
pub struct TrieNode<KEY, PAYLOAD> where KEY : TrieKey {
pub struct TrieNode<KEY, PAYLOAD>
where
KEY: TrieKey,
{
pub key: Option<KEY>,
pub payload: PAYLOAD,
pub prev: Option<usize>,
pub next: Option<usize>,
pub child: Option<usize>,
pub parent: Option<usize>,
pub ref_count: usize
pub ref_count: usize,
}
impl <KEY, PAYLOAD> TrieNode<KEY, PAYLOAD> where KEY : TrieKey {
fn new<U>(key: Option<KEY>, payload_initializer : U, prev: Option<usize>, next: Option<usize>, parent: Option<usize>, child : Option<usize>) -> TrieNode<KEY, PAYLOAD>
where U : Fn() -> PAYLOAD {
impl<KEY, PAYLOAD> TrieNode<KEY, PAYLOAD>
where
KEY: TrieKey,
{
fn new<U>(
key: Option<KEY>,
payload_initializer: U,
prev: Option<usize>,
next: Option<usize>,
parent: Option<usize>,
child: Option<usize>,
) -> TrieNode<KEY, PAYLOAD>
where
U: Fn() -> PAYLOAD,
{
TrieNode {
key,
payload: payload_initializer(),
@@ -26,12 +39,14 @@ impl <KEY, PAYLOAD> TrieNode<KEY, PAYLOAD> where KEY : TrieKey {
next: next,
child: child,
parent: parent,
ref_count: 0
ref_count: 0,
}
}
pub fn new0<U>(key: Option<KEY>, payload_initializer : U) -> TrieNode<KEY, PAYLOAD>
where U : Fn() -> PAYLOAD {
pub fn new0<U>(key: Option<KEY>, payload_initializer: U) -> TrieNode<KEY, PAYLOAD>
where
U: Fn() -> PAYLOAD,
{
TrieNode::new(key, payload_initializer, None, None, None, None)
}
}
}