Compare commits

..

1 Commits

Author SHA1 Message Date
woggioni a8a5ad9491 Initial commit 2025-07-30 06:49:20 +02:00
18 changed files with 2 additions and 522673 deletions
-7
View File
@@ -1,7 +0,0 @@
[registry]
default = "gitea"
[registries.gitea]
global-credential-providers = ["cargo:token"]
index = "sparse+https://gitea.woggioni.net/api/packages/woggioni/cargo/"
-18
View File
@@ -1,18 +0,0 @@
name: CI
on:
push:
branches: [ master ]
jobs:
build:
runs-on: woryzen
steps:
- name: Checkout sources
uses: actions/checkout@v4
- name: Run unit tests
run: |
cargo test
- name: Publish artifacts
env:
CARGO_REGISTRIES_GITEA_TOKEN: Bearer ${{ secrets.PUBLISHER_TOKEN }}
run: |
cargo publish --registry=gitea
-2
View File
@@ -1,2 +0,0 @@
/target
**/*.rs.bk
Generated
-309
View File
@@ -1,309 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "anstream"
version = "0.6.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
[[package]]
name = "anstyle-parse"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882"
dependencies = [
"anstyle",
"once_cell_polyfill",
"windows-sys",
]
[[package]]
name = "autocfg"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "clap"
version = "4.5.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed87a9d530bb41a67537289bafcac159cb3ee28460e0a4571123d2a778a6a882"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.5.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64f4f3f3c77c94aff3c7e9aac9a2ca1974a5adf392a8bb751e827d6d127ab966"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.5.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491"
dependencies = [
"heck 0.5.0",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
[[package]]
name = "colorchoice"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "levtree"
version = "0.1.2"
dependencies = [
"clap",
"rmath",
"sealed",
"trait-group",
]
[[package]]
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
[[package]]
name = "opimps"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "857dabe64a7afe2e51ac9962dc3c008e74ae050dd47e21a7e7b1fc69a67a0229"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "proc-macro2"
version = "1.0.95"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rmath"
version = "0.1.0"
source = "sparse+https://gitea.woggioni.net/api/packages/woggioni/cargo/"
checksum = "73da6144552f77474e00a800955098b34da9bd10fde4c1570290978c2c48da27"
dependencies = [
"num-traits",
"opimps",
"sealed",
"trait-group",
]
[[package]]
name = "sealed"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4a8caec23b7800fb97971a1c6ae365b6239aaeddfb934d6265f8505e795699d"
dependencies = [
"heck 0.4.1",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "trait-group"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1b362975c6f0f21a41fbb9ca91fe5dcb7e01e12331360374347476b45f5cb9c"
[[package]]
name = "unicode-ident"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
-32
View File
@@ -1,32 +0,0 @@
[package]
name = "levtree"
version = "0.1.2"
authors = ["Walter Oggioni <oggioni.walter@gmail.com>"]
license = "MIT"
edition = "2024"
[dependencies]
trait-group = "0.1"
sealed = "0.5"
[dev-dependencies]
clap = { version = "4.5", features = ["derive"] }
rmath = { version = "0.1", registry = "gitea" }
[lib]
name = "levtree"
crate-type = ["lib"]
bench = false
[[example]]
name = "benchmark"
path = "examples/benchmark.rs"
[[example]]
name = "searcher"
path = "examples/searcher.rs"
[[example]]
name = "lcs"
path = "examples/lcs.rs"
+2
View File
@@ -0,0 +1,2 @@
# rlevtree
-50
View File
@@ -1,50 +0,0 @@
extern crate levtree;
use levtree::CaseSensitiveLevTrie;
use levtree::DamerauLevenshteinDistanceCalculator;
use levtree::LevTrie;
use std::io::BufRead;
use std::io::BufReader;
fn main() {
let bytes = include_bytes!("cracklib-small");
let reader = BufReader::new(&bytes[..]);
let mut trie: CaseSensitiveLevTrie = LevTrie::new();
reader
.lines()
.map(|line| line.unwrap())
.for_each(|word: String| {
trie.add(word.chars());
});
let keys: Vec<Vec<char>> = [
"camel",
"coriolis",
"mattel",
"cruzer",
"cpoper",
"roublesoot",
]
.into_iter()
.map(|it| it.chars().collect())
.collect();
for _ in 0..50 {
for key in &keys {
let word = &key;
trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
}
}
for key in keys {
let word = &key;
let results = trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
println!("needle: {}", key.iter().collect::<String>());
for result in results {
let word: String = trie.lineal_descendant(result.word).into_iter().collect();
println!("distance: {}, wordkey: {}", result.distance, word);
}
println!("")
}
}
File diff suppressed because it is too large Load Diff
-69
View File
@@ -1,69 +0,0 @@
use clap::Parser;
use rmath::HMatrix;
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct CliArgs {
word1: String,
word2: String,
}
fn lcs_len(s1: &[char], s2: &[char]) -> usize {
let m = s1.len();
let n = s2.len();
let mut workspace = HMatrix::<usize>::new(m, n, |(_, _)| 0);
for i in 0..m {
for j in 0..n {
if s1[i] == s2[j] {
workspace[(i, j)] = if i == 0 || j == 0 {
1
} else {
workspace[(i - 1, j - 1)] + 1
};
} else {
let left = if j > 0 { workspace[(i, j - 1)] } else { 0 };
let up = if i > 0 { workspace[(i - 1, j)] } else { 0 };
workspace[(i, j)] = usize::max(left, up)
}
}
}
workspace[(m - 1, n - 1)]
}
fn lcs_distance(s1: &[char], s2: &[char]) -> usize {
let m = s1.len();
let n = s2.len();
let max_distance = m;
let mut workspace = HMatrix::<usize>::new(m, n, |(_, _)| 0);
for i in 0..m {
for j in 0..n {
if s1[i] == s2[j] {
workspace[(i, j)] = if i == 0 || j == 0 {
max_distance - 1
} else {
workspace[(i - 1, j - 1)] - 1
};
} else {
let left = if j > 0 {
workspace[(i, j - 1)]
} else {
max_distance
};
let up = if i > 0 {
workspace[(i - 1, j)]
} else {
max_distance
};
workspace[(i, j)] = usize::min(left, up)
}
}
}
workspace[(m - 1, n - 1)]
}
fn main() {
let args = CliArgs::parse();
let s1 = args.word1.chars().collect::<Vec<char>>();
let s2 = args.word2.chars().collect::<Vec<char>>();
println!("{}", lcs_distance(&s1, &s2));
}
-96
View File
@@ -1,96 +0,0 @@
extern crate levtree;
use levtree::{CaseInsensitiveKeyChecker, CaseSensitiveKeyChecker};
use levtree::{
DamerauLevenshteinDistanceCalculator, DistanceCalculator, KeyChecker, LcsDistanceCalculator,
LevTrie, LevenshteinDistanceCalculator, LevenshteinNoSubDistanceCalculator,
};
use std::io::BufRead;
use std::io::BufReader;
use clap::{Parser, ValueEnum};
#[derive(Debug, Clone, ValueEnum)]
enum Algorithm {
Lcs,
Levenshtein,
LevenshteinNoSub,
DamerauLevenshtein,
}
/// Encrypt/decrypt files using catenaccio cipher
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct CliArgs {
/// Word to look up
#[arg(short, long)]
wordkey: String,
/// Number of results returned
#[arg(short, long, default_value_t = 10)]
result_size: usize,
//Specify distance algorithm
#[arg(short, long, required = false, default_value = "damerau-levenshtein")]
distance: Algorithm,
//Toggle case sensitivity
#[arg(short, long, required = false, default_value_t = false)]
case_sensitive: bool,
}
impl CliArgs {}
fn run<KC: KeyChecker<char>, DC: DistanceCalculator<char, KC>>(args: &CliArgs) {
let mut trie = LevTrie::<char, KC>::new();
let bytes = include_bytes!("words.txt");
let reader = BufReader::new(&bytes[..]);
reader
.lines()
.map(|line| line.unwrap())
.for_each(|word: String| {
trie.add(word.chars());
});
let word = args.wordkey.chars().collect::<Vec<char>>();
let results = trie.fuzzy_search::<DC>(&word, args.result_size);
for result in results {
let word: String = trie.lineal_descendant(result.word).into_iter().collect();
println!("distance: {}, wordkey: {}", result.distance, word);
}
}
fn main() {
let args = CliArgs::parse();
if args.case_sensitive {
match args.distance {
Algorithm::Lcs => {
run::<CaseSensitiveKeyChecker, LcsDistanceCalculator>(&args);
}
Algorithm::Levenshtein => {
run::<CaseSensitiveKeyChecker, LevenshteinDistanceCalculator>(&args);
}
Algorithm::LevenshteinNoSub => {
run::<CaseSensitiveKeyChecker, LevenshteinNoSubDistanceCalculator>(&args);
}
Algorithm::DamerauLevenshtein => {
run::<CaseSensitiveKeyChecker, DamerauLevenshteinDistanceCalculator>(&args);
}
}
} else {
match args.distance {
Algorithm::Lcs => {
run::<CaseInsensitiveKeyChecker, LcsDistanceCalculator>(&args);
}
Algorithm::Levenshtein => {
run::<CaseInsensitiveKeyChecker, LevenshteinDistanceCalculator>(&args);
}
Algorithm::LevenshteinNoSub => {
run::<CaseInsensitiveKeyChecker, LevenshteinNoSubDistanceCalculator>(&args);
}
Algorithm::DamerauLevenshtein => {
run::<CaseInsensitiveKeyChecker, DamerauLevenshteinDistanceCalculator>(&args);
}
}
}
}
-466550
View File
File diff suppressed because it is too large Load Diff
-26
View File
@@ -1,26 +0,0 @@
use super::trienode::TrieKey;
pub trait KeyChecker<KEY>
where
KEY: TrieKey,
{
fn check(k1: Option<KEY>, k2: Option<KEY>) -> bool;
}
pub struct CaseInsensitiveKeyChecker {}
impl KeyChecker<char> for CaseInsensitiveKeyChecker {
fn check(k1: Option<char>, k2: Option<char>) -> bool {
k1.zip(k2)
.map(|(v1, v2)| v1.to_lowercase().next() == v2.to_lowercase().next())
.unwrap_or_else(|| k1 == k2)
}
}
pub struct CaseSensitiveKeyChecker {}
impl KeyChecker<char> for CaseSensitiveKeyChecker {
fn check(k1: Option<char>, k2: Option<char>) -> bool {
k1 == k2
}
}
-286
View File
@@ -1,286 +0,0 @@
extern crate sealed;
use self::sealed::sealed;
use std::collections::BinaryHeap;
use super::keychecker::KeyChecker;
use super::search_result::SearchResult;
use super::trie::Trie;
use super::trie::VisitOutcome;
use super::trienode::TrieKey;
use super::trienode::TrieNode;
pub type LevTrie<KEY, KEYCHECKER> = Trie<KEY, KEYCHECKER>;
pub type LevTrieNode<KEY> = TrieNode<KEY>;
#[sealed]
pub trait DistanceCalculator<KEY, KEYCHECKER>
where
KEY: TrieKey,
KEYCHECKER: KeyChecker<KEY>,
{
fn compute(
workspace: &mut Vec<Vec<usize>>,
nodes: &[LevTrieNode<KEY>],
stack: &[usize],
wordkey: &[KEY],
worst_case: Option<usize>,
) -> VisitOutcome;
}
impl<KEY, KEYCHECKER> LevTrie<KEY, KEYCHECKER>
where
KEY: TrieKey,
KEYCHECKER: KeyChecker<KEY>,
{
pub fn new() -> LevTrie<KEY, KEYCHECKER> {
Trie::default()
}
pub fn from_words<T, U>(wordlist: U) -> LevTrie<KEY, KEYCHECKER>
where
T: IntoIterator<Item = KEY>,
U: IntoIterator<Item = T>,
{
let mut result = LevTrie::new();
for word in wordlist {
result.add(word);
}
result
}
pub fn fuzzy_search<DC>(&mut self, word: &[KEY], max_result: usize) -> Vec<SearchResult>
where
DC: DistanceCalculator<KEY, KEYCHECKER>,
{
let word_len = word.len();
let required_size = word_len + 1;
let workspace: &mut Vec<Vec<usize>> = &mut vec![Vec::new(); self.nodes()];
let mut result_heap = BinaryHeap::<SearchResult>::with_capacity(max_result + 1);
let visit_pre = |stack: &Vec<usize>| -> VisitOutcome {
let stack_size = stack.len();
let current_node_id = *stack.last().unwrap();
let payload = &mut workspace[current_node_id];
payload.resize(required_size, usize::default());
if stack_size == 1 {
for (i, item) in payload.iter_mut().enumerate() {
*item = i;
}
} else {
for (i, item) in payload.iter_mut().enumerate() {
*item = if i == 0 { stack_size - 1 } else { 0 }
}
}
if stack_size > 1 {
let current_node = &mut self.get_node(current_node_id);
if current_node.key.is_none() {
let distance = workspace[stack[stack_size - 2]][word_len];
let search_result = SearchResult {
distance,
word: current_node_id,
};
result_heap.push(search_result);
if result_heap.len() > max_result {
result_heap.pop();
}
VisitOutcome::Skip
} else {
let worst_case = result_heap
.peek()
.filter(|_| result_heap.len() == max_result)
.map(|it| it.distance);
DC::compute(workspace, &self.nodes, stack, word, worst_case)
}
} else {
VisitOutcome::Continue
}
};
let visit_post = |_: &Vec<usize>| {};
self.walk(visit_pre, visit_post);
result_heap.into_sorted_vec()
}
}
pub struct LevenshteinDistanceCalculator {}
#[sealed]
impl<KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for LevenshteinDistanceCalculator
where
KEY: TrieKey,
KEYCHECKER: KeyChecker<KEY>,
{
fn compute(
workspace: &mut Vec<Vec<usize>>,
nodes: &[LevTrieNode<KEY>],
stack: &[usize],
wordkey: &[KEY],
worst_case: Option<usize>,
) -> VisitOutcome {
let sz = stack.len();
let key_size = wordkey.len();
for i in 1..=key_size {
if KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 1]].key) {
workspace[stack[sz - 1]][i] = workspace[stack[sz - 2]][i - 1];
} else {
workspace[stack[sz - 1]][i] = std::cmp::min(
std::cmp::min(
workspace[stack[sz - 1]][i - 1],
workspace[stack[sz - 2]][i - 1],
),
workspace[stack[sz - 2]][i],
) + 1;
}
}
let condition = worst_case
.map(|wv| wv <= *workspace[stack[sz - 1]][..].iter().min().unwrap())
.unwrap_or(false);
if condition {
VisitOutcome::Skip
} else {
VisitOutcome::Continue
}
}
}
pub struct DamerauLevenshteinDistanceCalculator {}
#[sealed]
impl<KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for DamerauLevenshteinDistanceCalculator
where
KEY: TrieKey,
KEYCHECKER: KeyChecker<KEY>,
{
fn compute(
workspace: &mut Vec<Vec<usize>>,
nodes: &[LevTrieNode<KEY>],
stack: &[usize],
wordkey: &[KEY],
worst_case: Option<usize>,
) -> VisitOutcome {
let sz = stack.len();
let key_size = wordkey.len();
for i in 1..=key_size {
if KEYCHECKER::check(
Some(wordkey[i - 1]),
stack.last().and_then(|it| nodes[*it].key),
) {
workspace[stack[sz - 1]][i] = workspace[stack[sz - 2]][i - 1];
} else {
workspace[stack[sz - 1]][i] = std::cmp::min(
std::cmp::min(
workspace[stack[sz - 1]][i - 1],
workspace[stack[sz - 2]][i - 1],
),
workspace[stack[sz - 2]][i],
) + 1;
}
if sz > 2
&& i > 1
&& KEYCHECKER::check(Some(wordkey[i - 2]), nodes[stack[sz - 1]].key)
&& KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 2]].key)
{
workspace[stack[sz - 1]][i] = std::cmp::min(
workspace[stack[sz - 1]][i],
workspace[stack[sz - 3]][i - 2] + 1,
);
}
}
let condition = worst_case
.map(|wv| wv <= *workspace[stack[sz - 2]][..].iter().min().unwrap())
.unwrap_or(false);
if condition {
VisitOutcome::Skip
} else {
VisitOutcome::Continue
}
}
}
pub struct LevenshteinNoSubDistanceCalculator {}
#[sealed]
impl<KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for LevenshteinNoSubDistanceCalculator
where
KEY: TrieKey,
KEYCHECKER: KeyChecker<KEY>,
{
fn compute(
workspace: &mut Vec<Vec<usize>>,
nodes: &[LevTrieNode<KEY>],
stack: &[usize],
wordkey: &[KEY],
worst_case: Option<usize>,
) -> VisitOutcome {
let sz = stack.len();
let key_size = wordkey.len();
for i in 1..=key_size {
if KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 1]].key) {
workspace[stack[sz - 1]][i] = workspace[stack[sz - 2]][i - 1];
} else {
workspace[stack[sz - 1]][i] = std::cmp::min(
std::cmp::min(
workspace[stack[sz - 1]][i - 1] + 1,
workspace[stack[sz - 2]][i - 1] + 2,
),
workspace[stack[sz - 2]][i] + 1,
);
}
}
let condition = worst_case
.map(|wv| wv <= *workspace[stack[sz - 1]][..].iter().min().unwrap())
.unwrap_or(false);
if condition {
VisitOutcome::Skip
} else {
VisitOutcome::Continue
}
}
}
pub struct LcsDistanceCalculator {}
#[sealed]
impl<KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for LcsDistanceCalculator
where
KEY: TrieKey,
KEYCHECKER: KeyChecker<KEY>,
{
fn compute(
workspace: &mut Vec<Vec<usize>>,
nodes: &[LevTrieNode<KEY>],
stack: &[usize],
wordkey: &[KEY],
_: Option<usize>,
) -> VisitOutcome {
let max_distance = wordkey.len();
let sz = stack.len();
let key_size = wordkey.len();
for i in 1..=key_size {
if KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 1]].key) {
workspace[stack[sz - 1]][i] = if sz == 2 || i == 1 {
max_distance - 1
} else {
workspace[stack[sz - 2]][i - 1] - 1
};
} else {
let up = if sz == 2 {
max_distance
} else {
workspace[stack[sz - 2]][i]
};
let left = if i == 1 {
max_distance
} else {
workspace[stack[sz - 1]][i - 1]
};
workspace[stack[sz - 1]][i] = std::cmp::min(up, left);
}
}
let condition = workspace[stack[sz - 1]][key_size] == 0;
if condition {
VisitOutcome::Skip
} else {
VisitOutcome::Continue
}
}
}
-30
View File
@@ -1,30 +0,0 @@
#[macro_use]
extern crate trait_group;
mod levtrie;
// pub use self::levtrie::LevTrieNode as LevTrieNode;
pub use self::levtrie::DamerauLevenshteinDistanceCalculator;
pub use self::levtrie::DistanceCalculator;
pub use self::levtrie::LcsDistanceCalculator;
pub use self::levtrie::LevTrie;
pub use self::levtrie::LevenshteinDistanceCalculator;
pub use self::levtrie::LevenshteinNoSubDistanceCalculator;
mod trie;
pub use self::trie::Trie;
mod trienode;
//use self::trienode::TrieNode as TrieNode;
mod keychecker;
pub use self::keychecker::CaseInsensitiveKeyChecker;
pub use self::keychecker::CaseSensitiveKeyChecker;
pub use self::keychecker::KeyChecker;
pub type CaseSensitiveLevTrie = LevTrie<char, CaseSensitiveKeyChecker>;
pub type CaseInSensitiveLevTrie = LevTrie<char, CaseInsensitiveKeyChecker>;
mod search_result;
pub use self::search_result::SearchResult;
#[cfg(test)]
mod tests;
-31
View File
@@ -1,31 +0,0 @@
use std::cmp::Ordering;
#[derive(Clone)]
pub struct SearchResult {
pub word: usize,
pub distance: usize,
}
impl PartialOrd for SearchResult {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl PartialEq for SearchResult {
fn eq(&self, other: &Self) -> bool {
self.distance == other.distance && self.word == other.word
}
}
impl Eq for SearchResult {}
impl Ord for SearchResult {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
match self.distance.cmp(&other.distance) {
std::cmp::Ordering::Equal => self.word.cmp(&other.word),
std::cmp::Ordering::Greater => std::cmp::Ordering::Greater,
std::cmp::Ordering::Less => std::cmp::Ordering::Less,
}
}
}
-138
View File
@@ -1,138 +0,0 @@
use super::{
CaseSensitiveLevTrie, DamerauLevenshteinDistanceCalculator, KeyChecker, LevTrie,
LevenshteinDistanceCalculator, SearchResult,
};
use std::collections::BTreeMap;
use std::fmt::Display;
use std::io::Write;
struct ExpectedResults {
data: Vec<(usize, usize)>,
}
impl ExpectedResults {
fn new(id_map: &BTreeMap<String, usize>, results: &[(String, usize)]) -> ExpectedResults {
let data = results
.iter()
.map(|(key, distance)| {
(
*id_map
.get(key)
.ok_or_else(|| format!("Id not found for key '{key}'"))
.unwrap(),
*distance,
)
})
.collect::<Vec<(usize, usize)>>();
ExpectedResults { data }
}
fn check(&self, search_results: &[SearchResult]) {
for i in 0..self.data.len() {
let SearchResult { word, distance } = search_results[i];
let data = self.data[i];
if data != (word, distance) {
panic!("({}, {}) <> ({}, {})", data.0, data.1, word, distance);
}
}
}
}
fn print_search_results<T: Display + Copy, C: KeyChecker<T>>(
trie: &LevTrie<T, C>,
search_results: &[SearchResult],
key_separator: &str,
) -> Result<(), std::io::Error> {
for result in search_results {
let mut word = Vec::<u8>::new();
for (i, fragment) in trie.lineal_descendant(result.word).enumerate() {
if i > 0 {
word.write(format!("{}{}", key_separator, fragment).as_bytes())?;
} else {
word.write(format!("{}", fragment).as_bytes())?;
}
}
println!(
"distance: {}, wordkey: {}, id: {}",
result.distance,
String::from_utf8(word).unwrap(),
result.word
);
}
Ok(())
}
const WORDLIST: [&str; 16] = [
"skyscraper",
"camel",
"coal",
"caos",
"copper",
"hello",
"Bugis",
"Kembangan",
"Singapore",
"Fullerton",
"Lavender",
"aircraft",
"boat",
"ship",
"cargo",
"tanker",
];
#[test]
fn test_damerau_levenshtein_strings() {
let mut trie: CaseSensitiveLevTrie = LevTrie::new();
let mut id_map = BTreeMap::<String, usize>::new();
for word in WORDLIST {
let (_, id) = trie.add(word.chars());
id_map.insert(String::from(word), id);
}
let results = trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(
&"coat".chars().collect::<Vec<char>>(),
6,
);
print_search_results(&trie, &results, "").unwrap();
let expected_results = ExpectedResults::new(
&id_map,
&[
(String::from("coal"), 1),
(String::from("boat"), 1),
(String::from("caos"), 2),
(String::from("camel"), 4),
(String::from("copper"), 4),
(String::from("ship"), 4),
],
);
expected_results.check(&results);
}
#[test]
fn test_levenshtein_strings() {
let mut trie: CaseSensitiveLevTrie = LevTrie::new();
let mut id_map = BTreeMap::<String, usize>::new();
for word in WORDLIST {
let (_, id) = trie.add(word.chars());
id_map.insert(String::from(word), id);
}
let results = trie
.fuzzy_search::<LevenshteinDistanceCalculator>(&"coat".chars().collect::<Vec<char>>(), 6);
print_search_results(&trie, &results, "").unwrap();
let expected_results = ExpectedResults::new(
&id_map,
&[
(String::from("coal"), 1),
(String::from("boat"), 1),
(String::from("caos"), 3),
(String::from("camel"), 4),
(String::from("copper"), 4),
(String::from("ship"), 4),
],
);
expected_results.check(&results);
}
-225
View File
@@ -1,225 +0,0 @@
use super::keychecker::KeyChecker;
use super::trienode::TrieKey;
use super::trienode::TrieNode;
use std::collections::BTreeSet;
use std::iter::Iterator;
use std::marker::PhantomData;
pub enum VisitOutcome {
Continue,
Skip,
EarlyExit,
}
pub struct Trie<KEY, KEYCHECKER>
where
KEY: TrieKey,
KEYCHECKER: KeyChecker<KEY>,
{
pub(crate) nodes: Vec<TrieNode<KEY>>,
tails: BTreeSet<usize>,
checker: PhantomData<KEYCHECKER>,
}
impl<KEY, KEYCHECKER> Default for Trie<KEY, KEYCHECKER>
where
KEY: TrieKey,
KEYCHECKER: KeyChecker<KEY>,
{
fn default() -> Self {
Trie {
nodes: vec![TrieNode::new0(None)],
tails: BTreeSet::new(),
checker: PhantomData,
}
}
}
impl<KEY, KEYCHECKER> Trie<KEY, KEYCHECKER>
where
KEY: TrieKey,
KEYCHECKER: KeyChecker<KEY>,
{
pub fn trie_from_words<T, U>(wordlist: U) -> Trie<KEY, KEYCHECKER>
where
T: IntoIterator<Item = KEY>,
U: IntoIterator<Item = T>,
{
let mut result = Trie::default();
for word in wordlist {
result.add(word);
}
result
}
pub(crate) fn get_node_mut(&mut self, index: usize) -> &mut TrieNode<KEY> {
&mut self.nodes[index]
}
pub(crate) fn get_node(&self, index: usize) -> &TrieNode<KEY> {
&self.nodes[index]
}
pub(crate) fn nodes(&self) -> usize {
self.nodes.len()
}
fn add_node(&mut self, key: Option<KEY>, parent: usize, prev: Option<usize>) -> usize {
let mut result = TrieNode::new0(key);
let result_index = self.nodes();
result.parent = Some(parent);
match prev {
Some(prev_node) => {
self.get_node_mut(prev_node).next = Some(result_index);
result.prev = prev;
}
None => {
let parent_node = self.get_node_mut(parent);
match parent_node.child {
None => {
parent_node.child = Some(result_index);
}
Some(parent_child) => {
let mut node = parent_child;
loop {
let next = self.get_node(node).next;
match next {
Some(next_node) => {
node = next_node;
}
None => {
break;
}
}
}
self.get_node_mut(node).next = Some(result_index);
result.prev = Some(node)
}
}
}
}
self.nodes.push(result);
result_index
}
pub fn add<T>(&mut self, path: T) -> (bool, usize)
where
T: IntoIterator<Item = KEY>,
{
let mut result = false;
let mut pnode = 0;
'wordLoop: for key in path {
let mut cnode = self.get_node(pnode).child;
while let Some(cnode_index) = cnode {
let cnode_node = self.get_node(cnode_index);
if KEYCHECKER::check(cnode_node.key, Some(key)) {
pnode = cnode_index;
continue 'wordLoop;
} else if self.get_node(cnode_index).next.is_none() {
break;
} else {
cnode = self.get_node(cnode_index).next;
}
}
pnode = self.add_node(Some(key), pnode, cnode);
result = true;
}
if result {
let tail = self.add_node(None, pnode, None);
self.tails.insert(tail);
let mut node = Some(tail);
while let Some(n) = node {
let current_node = self.get_node_mut(n);
current_node.ref_count += 1;
node = current_node.parent;
}
(true, tail)
} else {
(false, pnode)
}
}
pub fn search(&mut self, path: Vec<KEY>) -> Option<usize> {
let mut result: Option<usize> = None;
let visit_pre = |stack: &Vec<usize>| -> VisitOutcome {
if stack.len() == 1 {
VisitOutcome::Continue
} else {
let last = *stack.last().expect("");
let index = stack.len() - 2;
let node = self.get_node(last);
if index < path.len() {
if KEYCHECKER::check(node.key, Some(path[index])) {
VisitOutcome::Continue
} else {
VisitOutcome::Skip
}
} else {
if node.key.is_none() {
result = Some(last);
}
VisitOutcome::EarlyExit
}
}
};
let visit_post = |_: &Vec<usize>| {};
self.walk(visit_pre, visit_post);
result
}
pub fn lineal_descendant(&self, start: usize) -> impl Iterator<Item = &KEY> {
let mut nodes: Vec<usize> = vec![];
let mut node_option = Some(start);
while let Some(node) = node_option {
let key = &self.get_node(node).key;
if key.is_some() {
nodes.push(node);
}
node_option = self.get_node(node).parent;
}
nodes
.into_iter()
.rev()
.map(|node_index| self.get_node(node_index).key.as_ref().unwrap())
}
pub(crate) fn walk<CB1, CB2>(&self, mut visit_pre: CB1, mut visit_post: CB2)
where
CB1: FnMut(&Vec<usize>) -> VisitOutcome,
CB2: FnMut(&Vec<usize>),
{
let mut stack: Vec<(usize, Option<usize>)> = vec![];
let mut public_stack: Vec<usize> = vec![];
let root_node = self.get_node(0);
stack.push((0, root_node.child));
public_stack.push(0);
visit_pre(&public_stack);
while !stack.is_empty() {
let last = &mut stack.last_mut().unwrap();
match last.1 {
Some(child_node_id) => {
let child_node = self.get_node(child_node_id);
last.1 = child_node.next;
public_stack.push(child_node_id);
let visit_pre_outcome = visit_pre(&public_stack);
match visit_pre_outcome {
VisitOutcome::Continue => {
stack.push((child_node_id, child_node.child));
}
VisitOutcome::Skip => {
stack.push((child_node_id, None));
}
VisitOutcome::EarlyExit => return,
}
}
None => {
visit_post(&public_stack);
stack.pop();
public_stack.pop();
}
}
}
}
pub fn tails(&self) -> &BTreeSet<usize> {
&self.tails
}
}
-41
View File
@@ -1,41 +0,0 @@
trait_group! {
pub trait TrieKey : std::marker::Copy + std::fmt::Display + Sized
}
pub struct TrieNode<KEY>
where
KEY: TrieKey,
{
pub key: Option<KEY>,
pub prev: Option<usize>,
pub next: Option<usize>,
pub child: Option<usize>,
pub parent: Option<usize>,
pub(crate) ref_count: usize,
}
impl<KEY> TrieNode<KEY>
where
KEY: TrieKey,
{
fn new(
key: Option<KEY>,
prev: Option<usize>,
next: Option<usize>,
parent: Option<usize>,
child: Option<usize>,
) -> TrieNode<KEY> {
TrieNode {
key,
prev,
next,
child,
parent,
ref_count: 0,
}
}
pub(crate) fn new0(key: Option<KEY>) -> TrieNode<KEY> {
TrieNode::new(key, None, None, None, None)
}
}