added least common sequence and levenshtein with no substitution
All checks were successful
CI / build (push) Successful in 9s

This commit is contained in:
2025-07-31 14:35:30 +08:00
parent 1ac5c8fcdf
commit d14c907bf8
10 changed files with 467090 additions and 40 deletions

View File

@@ -1,4 +1,7 @@
[registry]
default = "gitea"
[registries.gitea]
global-credential-providers = ["cargo:token"]
index = "sparse+https://gitea.woggioni.net/api/packages/woggioni/cargo/"

263
Cargo.lock generated
View File

@@ -2,55 +2,215 @@
# It is not intended for manual editing.
version = 4
[[package]]
name = "anstream"
version = "0.6.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
[[package]]
name = "anstyle-parse"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882"
dependencies = [
"anstyle",
"once_cell_polyfill",
"windows-sys",
]
[[package]]
name = "autocfg"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "clap"
version = "4.5.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed87a9d530bb41a67537289bafcac159cb3ee28460e0a4571123d2a778a6a882"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.5.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64f4f3f3c77c94aff3c7e9aac9a2ca1974a5adf392a8bb751e827d6d127ab966"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.5.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491"
dependencies = [
"heck 0.5.0",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
[[package]]
name = "colorchoice"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "levtree"
version = "0.1.1"
version = "0.1.2"
dependencies = [
"clap",
"rmath",
"sealed",
"trait-group",
]
[[package]]
name = "proc-macro2"
version = "1.0.70"
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
[[package]]
name = "opimps"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "857dabe64a7afe2e51ac9962dc3c008e74ae050dd47e21a7e7b1fc69a67a0229"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "proc-macro2"
version = "1.0.95"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.33"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rmath"
version = "0.1.0"
source = "sparse+https://gitea.woggioni.net/api/packages/woggioni/cargo/"
checksum = "73da6144552f77474e00a800955098b34da9bd10fde4c1570290978c2c48da27"
dependencies = [
"num-traits",
"opimps",
"sealed",
"trait-group",
]
[[package]]
name = "sealed"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4a8caec23b7800fb97971a1c6ae365b6239aaeddfb934d6265f8505e795699d"
dependencies = [
"heck",
"heck 0.4.1",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "syn"
version = "2.0.39"
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
dependencies = [
"proc-macro2",
"quote",
@@ -65,6 +225,85 @@ checksum = "e1b362975c6f0f21a41fbb9ca91fe5dcb7e01e12331360374347476b45f5cb9c"
[[package]]
name = "unicode-ident"
version = "1.0.12"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"

View File

@@ -1,14 +1,18 @@
[package]
name = "levtree"
version = "0.1.1"
version = "0.1.2"
authors = ["Walter Oggioni <oggioni.walter@gmail.com>"]
license = "MIT"
edition = "2024"
[dependencies]
trait-group = "0.1.0"
trait-group = "0.1"
sealed = "0.5"
[dev-dependencies]
clap = { version = "4.5", features = ["derive"] }
rmath = { version = "0.1", registry = "gitea" }
[lib]
name = "levtree"
crate-type = ["lib"]
@@ -16,6 +20,13 @@ bench = false
[[example]]
name = "levtree_benchmark"
name = "benchmark"
path = "examples/benchmark.rs"
[[example]]
name = "searcher"
path = "examples/searcher.rs"
[[example]]
name = "lcs"
path = "examples/lcs.rs"

View File

@@ -7,16 +7,6 @@ use levtree::LevTrie;
use std::io::BufRead;
use std::io::BufReader;
trait IntoCharSlice {
fn into_char_slice(&self) -> Vec<char>;
}
impl IntoCharSlice for str {
fn into_char_slice(&self) -> Vec<char> {
self.chars().into_iter().collect::<Vec<_>>()
}
}
fn main() {
let bytes = include_bytes!("cracklib-small");
let reader = BufReader::new(&bytes[..]);
@@ -28,26 +18,29 @@ fn main() {
trie.add(word.chars());
});
let keys = [
let keys: Vec<Vec<char>> = [
"camel",
"coriolis",
"mattel",
"cruzer",
"cpoper",
"roublesoot",
];
]
.into_iter()
.map(|it| it.chars().collect())
.collect();
for _ in 0..50 {
for key in keys {
let word = &key.into_char_slice()[..];
for key in &keys {
let word = &key;
trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
}
}
for key in keys {
let word = &key.into_char_slice()[..];
let word = &key;
let results = trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
println!("needle: {}", key);
println!("needle: {}", key.iter().collect::<String>());
for result in results {
let word: String = trie.lineal_descendant(result.word).into_iter().collect();
println!("distance: {}, wordkey: {}", result.distance, word);

69
examples/lcs.rs Normal file
View File

@@ -0,0 +1,69 @@
use clap::Parser;
use rmath::HMatrix;
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct CliArgs {
word1: String,
word2: String,
}
fn lcs_len(s1: &[char], s2: &[char]) -> usize {
let m = s1.len();
let n = s2.len();
let mut workspace = HMatrix::<usize>::new(m, n, |(_, _)| 0);
for i in 0..m {
for j in 0..n {
if s1[i] == s2[j] {
workspace[(i, j)] = if i == 0 || j == 0 {
1
} else {
workspace[(i - 1, j - 1)] + 1
};
} else {
let left = if j > 0 { workspace[(i, j - 1)] } else { 0 };
let up = if i > 0 { workspace[(i - 1, j)] } else { 0 };
workspace[(i, j)] = usize::max(left, up)
}
}
}
workspace[(m - 1, n - 1)]
}
fn lcs_distance(s1: &[char], s2: &[char]) -> usize {
let m = s1.len();
let n = s2.len();
let max_distance = m;
let mut workspace = HMatrix::<usize>::new(m, n, |(_, _)| 0);
for i in 0..m {
for j in 0..n {
if s1[i] == s2[j] {
workspace[(i, j)] = if i == 0 || j == 0 {
max_distance - 1
} else {
workspace[(i - 1, j - 1)] - 1
};
} else {
let left = if j > 0 {
workspace[(i, j - 1)]
} else {
max_distance
};
let up = if i > 0 {
workspace[(i - 1, j)]
} else {
max_distance
};
workspace[(i, j)] = usize::min(left, up)
}
}
}
workspace[(m - 1, n - 1)]
}
fn main() {
let args = CliArgs::parse();
let s1 = args.word1.chars().collect::<Vec<char>>();
let s2 = args.word2.chars().collect::<Vec<char>>();
println!("{}", lcs_distance(&s1, &s2));
}

96
examples/searcher.rs Normal file
View File

@@ -0,0 +1,96 @@
extern crate levtree;
use levtree::{CaseInsensitiveKeyChecker, CaseSensitiveKeyChecker};
use levtree::{
DamerauLevenshteinDistanceCalculator, DistanceCalculator, KeyChecker, LcsDistanceCalculator,
LevTrie, LevenshteinDistanceCalculator, LevenshteinNoSubDistanceCalculator,
};
use std::io::BufRead;
use std::io::BufReader;
use clap::{Parser, ValueEnum};
#[derive(Debug, Clone, ValueEnum)]
enum Algorithm {
Lcs,
Levenshtein,
LevenshteinNoSub,
DamerauLevenshtein,
}
/// Encrypt/decrypt files using catenaccio cipher
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct CliArgs {
/// Word to look up
#[arg(short, long)]
wordkey: String,
/// Number of results returned
#[arg(short, long, default_value_t = 10)]
result_size: usize,
//Specify distance algorithm
#[arg(short, long, required = false, default_value = "damerau-levenshtein")]
distance: Algorithm,
//Toggle case sensitivity
#[arg(short, long, required = false, default_value_t = false)]
case_sensitive: bool,
}
impl CliArgs {}
fn run<KC: KeyChecker<char>, DC: DistanceCalculator<char, KC>>(args: &CliArgs) {
let mut trie = LevTrie::<char, KC>::new();
let bytes = include_bytes!("words.txt");
let reader = BufReader::new(&bytes[..]);
reader
.lines()
.map(|line| line.unwrap())
.for_each(|word: String| {
trie.add(word.chars());
});
let word = args.wordkey.chars().collect::<Vec<char>>();
let results = trie.fuzzy_search::<DC>(&word, args.result_size);
for result in results {
let word: String = trie.lineal_descendant(result.word).into_iter().collect();
println!("distance: {}, wordkey: {}", result.distance, word);
}
}
fn main() {
let args = CliArgs::parse();
if args.case_sensitive {
match args.distance {
Algorithm::Lcs => {
run::<CaseSensitiveKeyChecker, LcsDistanceCalculator>(&args);
}
Algorithm::Levenshtein => {
run::<CaseSensitiveKeyChecker, LevenshteinDistanceCalculator>(&args);
}
Algorithm::LevenshteinNoSub => {
run::<CaseSensitiveKeyChecker, LevenshteinNoSubDistanceCalculator>(&args);
}
Algorithm::DamerauLevenshtein => {
run::<CaseSensitiveKeyChecker, DamerauLevenshteinDistanceCalculator>(&args);
}
}
} else {
match args.distance {
Algorithm::Lcs => {
run::<CaseInsensitiveKeyChecker, LcsDistanceCalculator>(&args);
}
Algorithm::Levenshtein => {
run::<CaseInsensitiveKeyChecker, LevenshteinDistanceCalculator>(&args);
}
Algorithm::LevenshteinNoSub => {
run::<CaseInsensitiveKeyChecker, LevenshteinNoSubDistanceCalculator>(&args);
}
Algorithm::DamerauLevenshtein => {
run::<CaseInsensitiveKeyChecker, DamerauLevenshteinDistanceCalculator>(&args);
}
}
}
}

466550
examples/words.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -53,20 +53,20 @@ where
DC: DistanceCalculator<KEY, KEYCHECKER>,
{
let word_len = word.len();
let workspace: &mut Vec<Vec<usize>> = &mut (0..self.nodes()).map(|_| Vec::new()).collect();
let mut result_heap = BinaryHeap::<SearchResult>::with_capacity(max_result + 1);
let required_size = word_len + 1;
let workspace: &mut Vec<Vec<usize>> = &mut vec![Vec::new(); self.nodes()];
let mut result_heap = BinaryHeap::<SearchResult>::with_capacity(max_result + 1);
let visit_pre = |stack: &Vec<usize>| -> VisitOutcome {
let stack_size = stack.len();
let current_node_id = *stack.last().unwrap();
let payload = &mut workspace[current_node_id];
payload.resize(required_size, usize::default());
if stack_size == 1 {
for (i, item) in payload.iter_mut().enumerate().take(required_size) {
for (i, item) in payload.iter_mut().enumerate() {
*item = i;
}
} else {
for (i, item) in payload.iter_mut().enumerate().take(required_size) {
for (i, item) in payload.iter_mut().enumerate() {
*item = if i == 0 { stack_size - 1 } else { 0 }
}
}
@@ -195,3 +195,92 @@ where
}
}
}
pub struct LevenshteinNoSubDistanceCalculator {}
#[sealed]
impl<KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for LevenshteinNoSubDistanceCalculator
where
KEY: TrieKey,
KEYCHECKER: KeyChecker<KEY>,
{
fn compute(
workspace: &mut Vec<Vec<usize>>,
nodes: &[LevTrieNode<KEY>],
stack: &[usize],
wordkey: &[KEY],
worst_case: Option<usize>,
) -> VisitOutcome {
let sz = stack.len();
let key_size = wordkey.len();
for i in 1..=key_size {
if KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 1]].key) {
workspace[stack[sz - 1]][i] = workspace[stack[sz - 2]][i - 1];
} else {
workspace[stack[sz - 1]][i] = std::cmp::min(
std::cmp::min(
workspace[stack[sz - 1]][i - 1] + 1,
workspace[stack[sz - 2]][i - 1] + 2,
),
workspace[stack[sz - 2]][i] + 1,
);
}
}
let condition = worst_case
.map(|wv| wv <= *workspace[stack[sz - 1]][..].iter().min().unwrap())
.unwrap_or(false);
if condition {
VisitOutcome::Skip
} else {
VisitOutcome::Continue
}
}
}
pub struct LcsDistanceCalculator {}
#[sealed]
impl<KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for LcsDistanceCalculator
where
KEY: TrieKey,
KEYCHECKER: KeyChecker<KEY>,
{
fn compute(
workspace: &mut Vec<Vec<usize>>,
nodes: &[LevTrieNode<KEY>],
stack: &[usize],
wordkey: &[KEY],
_: Option<usize>,
) -> VisitOutcome {
let max_distance = wordkey.len();
let sz = stack.len();
let key_size = wordkey.len();
for i in 1..=key_size {
if KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 1]].key) {
workspace[stack[sz - 1]][i] = if sz == 2 || i == 1 {
max_distance - 1
} else {
workspace[stack[sz - 2]][i - 1] - 1
};
} else {
let up = if sz == 2 {
max_distance
} else {
workspace[stack[sz - 2]][i]
};
let left = if i == 1 {
max_distance
} else {
workspace[stack[sz - 1]][i - 1]
};
workspace[stack[sz - 1]][i] = std::cmp::min(up, left);
}
}
let condition = workspace[stack[sz - 1]][key_size] == 0;
if condition {
VisitOutcome::Skip
} else {
VisitOutcome::Continue
}
}
}

View File

@@ -4,8 +4,10 @@ mod levtrie;
// pub use self::levtrie::LevTrieNode as LevTrieNode;
pub use self::levtrie::DamerauLevenshteinDistanceCalculator;
pub use self::levtrie::DistanceCalculator;
pub use self::levtrie::LcsDistanceCalculator;
pub use self::levtrie::LevTrie;
pub use self::levtrie::LevenshteinDistanceCalculator;
pub use self::levtrie::LevenshteinNoSubDistanceCalculator;
mod trie;
pub use self::trie::Trie;

View File

@@ -24,8 +24,7 @@ where
next: Option<usize>,
parent: Option<usize>,
child: Option<usize>,
) -> TrieNode<KEY>
{
) -> TrieNode<KEY> {
TrieNode {
key,
prev,
@@ -36,8 +35,7 @@ where
}
}
pub (crate) fn new0(key: Option<KEY>) -> TrieNode<KEY>
{
pub(crate) fn new0(key: Option<KEY>) -> TrieNode<KEY> {
TrieNode::new(key, None, None, None, None)
}
}