added least common sequence and levenshtein with no substitution
All checks were successful
CI / build (push) Successful in 9s
All checks were successful
CI / build (push) Successful in 9s
This commit is contained in:
@@ -1,4 +1,7 @@
|
|||||||
[registry]
|
[registry]
|
||||||
default = "gitea"
|
default = "gitea"
|
||||||
|
|
||||||
|
[registries.gitea]
|
||||||
|
global-credential-providers = ["cargo:token"]
|
||||||
|
index = "sparse+https://gitea.woggioni.net/api/packages/woggioni/cargo/"
|
||||||
|
|
||||||
|
263
Cargo.lock
generated
263
Cargo.lock
generated
@@ -2,55 +2,215 @@
|
|||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
version = 4
|
version = 4
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstream"
|
||||||
|
version = "0.6.19"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933"
|
||||||
|
dependencies = [
|
||||||
|
"anstyle",
|
||||||
|
"anstyle-parse",
|
||||||
|
"anstyle-query",
|
||||||
|
"anstyle-wincon",
|
||||||
|
"colorchoice",
|
||||||
|
"is_terminal_polyfill",
|
||||||
|
"utf8parse",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle"
|
||||||
|
version = "1.0.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-parse"
|
||||||
|
version = "0.2.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
|
||||||
|
dependencies = [
|
||||||
|
"utf8parse",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-query"
|
||||||
|
version = "1.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
|
||||||
|
dependencies = [
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-wincon"
|
||||||
|
version = "3.0.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882"
|
||||||
|
dependencies = [
|
||||||
|
"anstyle",
|
||||||
|
"once_cell_polyfill",
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "autocfg"
|
||||||
|
version = "1.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap"
|
||||||
|
version = "4.5.42"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ed87a9d530bb41a67537289bafcac159cb3ee28460e0a4571123d2a778a6a882"
|
||||||
|
dependencies = [
|
||||||
|
"clap_builder",
|
||||||
|
"clap_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_builder"
|
||||||
|
version = "4.5.42"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "64f4f3f3c77c94aff3c7e9aac9a2ca1974a5adf392a8bb751e827d6d127ab966"
|
||||||
|
dependencies = [
|
||||||
|
"anstream",
|
||||||
|
"anstyle",
|
||||||
|
"clap_lex",
|
||||||
|
"strsim",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_derive"
|
||||||
|
version = "4.5.41"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491"
|
||||||
|
dependencies = [
|
||||||
|
"heck 0.5.0",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_lex"
|
||||||
|
version = "0.7.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "colorchoice"
|
||||||
|
version = "1.0.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heck"
|
name = "heck"
|
||||||
version = "0.4.1"
|
version = "0.4.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "heck"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "is_terminal_polyfill"
|
||||||
|
version = "1.70.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "levtree"
|
name = "levtree"
|
||||||
version = "0.1.1"
|
version = "0.1.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"clap",
|
||||||
|
"rmath",
|
||||||
"sealed",
|
"sealed",
|
||||||
"trait-group",
|
"trait-group",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "num-traits"
|
||||||
version = "1.0.70"
|
version = "0.2.19"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
|
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "once_cell_polyfill"
|
||||||
|
version = "1.70.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "opimps"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "857dabe64a7afe2e51ac9962dc3c008e74ae050dd47e21a7e7b1fc69a67a0229"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.95"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "quote"
|
name = "quote"
|
||||||
version = "1.0.33"
|
version = "1.0.40"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
|
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rmath"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = "sparse+https://gitea.woggioni.net/api/packages/woggioni/cargo/"
|
||||||
|
checksum = "73da6144552f77474e00a800955098b34da9bd10fde4c1570290978c2c48da27"
|
||||||
|
dependencies = [
|
||||||
|
"num-traits",
|
||||||
|
"opimps",
|
||||||
|
"sealed",
|
||||||
|
"trait-group",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sealed"
|
name = "sealed"
|
||||||
version = "0.5.0"
|
version = "0.5.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f4a8caec23b7800fb97971a1c6ae365b6239aaeddfb934d6265f8505e795699d"
|
checksum = "f4a8caec23b7800fb97971a1c6ae365b6239aaeddfb934d6265f8505e795699d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"heck",
|
"heck 0.4.1",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "strsim"
|
||||||
version = "2.0.39"
|
version = "0.11.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a"
|
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "2.0.104"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
@@ -65,6 +225,85 @@ checksum = "e1b362975c6f0f21a41fbb9ca91fe5dcb7e01e12331360374347476b45f5cb9c"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-ident"
|
name = "unicode-ident"
|
||||||
version = "1.0.12"
|
version = "1.0.18"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf8parse"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-sys"
|
||||||
|
version = "0.59.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
||||||
|
dependencies = [
|
||||||
|
"windows-targets",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-targets"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
||||||
|
dependencies = [
|
||||||
|
"windows_aarch64_gnullvm",
|
||||||
|
"windows_aarch64_msvc",
|
||||||
|
"windows_i686_gnu",
|
||||||
|
"windows_i686_gnullvm",
|
||||||
|
"windows_i686_msvc",
|
||||||
|
"windows_x86_64_gnu",
|
||||||
|
"windows_x86_64_gnullvm",
|
||||||
|
"windows_x86_64_msvc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||||
|
17
Cargo.toml
17
Cargo.toml
@@ -1,14 +1,18 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "levtree"
|
name = "levtree"
|
||||||
version = "0.1.1"
|
version = "0.1.2"
|
||||||
authors = ["Walter Oggioni <oggioni.walter@gmail.com>"]
|
authors = ["Walter Oggioni <oggioni.walter@gmail.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
trait-group = "0.1.0"
|
trait-group = "0.1"
|
||||||
sealed = "0.5"
|
sealed = "0.5"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
clap = { version = "4.5", features = ["derive"] }
|
||||||
|
rmath = { version = "0.1", registry = "gitea" }
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
name = "levtree"
|
name = "levtree"
|
||||||
crate-type = ["lib"]
|
crate-type = ["lib"]
|
||||||
@@ -16,6 +20,13 @@ bench = false
|
|||||||
|
|
||||||
|
|
||||||
[[example]]
|
[[example]]
|
||||||
name = "levtree_benchmark"
|
name = "benchmark"
|
||||||
path = "examples/benchmark.rs"
|
path = "examples/benchmark.rs"
|
||||||
|
|
||||||
|
[[example]]
|
||||||
|
name = "searcher"
|
||||||
|
path = "examples/searcher.rs"
|
||||||
|
|
||||||
|
[[example]]
|
||||||
|
name = "lcs"
|
||||||
|
path = "examples/lcs.rs"
|
||||||
|
@@ -7,16 +7,6 @@ use levtree::LevTrie;
|
|||||||
use std::io::BufRead;
|
use std::io::BufRead;
|
||||||
use std::io::BufReader;
|
use std::io::BufReader;
|
||||||
|
|
||||||
trait IntoCharSlice {
|
|
||||||
fn into_char_slice(&self) -> Vec<char>;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IntoCharSlice for str {
|
|
||||||
fn into_char_slice(&self) -> Vec<char> {
|
|
||||||
self.chars().into_iter().collect::<Vec<_>>()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let bytes = include_bytes!("cracklib-small");
|
let bytes = include_bytes!("cracklib-small");
|
||||||
let reader = BufReader::new(&bytes[..]);
|
let reader = BufReader::new(&bytes[..]);
|
||||||
@@ -28,26 +18,29 @@ fn main() {
|
|||||||
trie.add(word.chars());
|
trie.add(word.chars());
|
||||||
});
|
});
|
||||||
|
|
||||||
let keys = [
|
let keys: Vec<Vec<char>> = [
|
||||||
"camel",
|
"camel",
|
||||||
"coriolis",
|
"coriolis",
|
||||||
"mattel",
|
"mattel",
|
||||||
"cruzer",
|
"cruzer",
|
||||||
"cpoper",
|
"cpoper",
|
||||||
"roublesoot",
|
"roublesoot",
|
||||||
];
|
]
|
||||||
|
.into_iter()
|
||||||
|
.map(|it| it.chars().collect())
|
||||||
|
.collect();
|
||||||
|
|
||||||
for _ in 0..50 {
|
for _ in 0..50 {
|
||||||
for key in keys {
|
for key in &keys {
|
||||||
let word = &key.into_char_slice()[..];
|
let word = &key;
|
||||||
trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
|
trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for key in keys {
|
for key in keys {
|
||||||
let word = &key.into_char_slice()[..];
|
let word = &key;
|
||||||
let results = trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
|
let results = trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
|
||||||
println!("needle: {}", key);
|
println!("needle: {}", key.iter().collect::<String>());
|
||||||
for result in results {
|
for result in results {
|
||||||
let word: String = trie.lineal_descendant(result.word).into_iter().collect();
|
let word: String = trie.lineal_descendant(result.word).into_iter().collect();
|
||||||
println!("distance: {}, wordkey: {}", result.distance, word);
|
println!("distance: {}, wordkey: {}", result.distance, word);
|
||||||
|
69
examples/lcs.rs
Normal file
69
examples/lcs.rs
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
use clap::Parser;
|
||||||
|
use rmath::HMatrix;
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(version, about, long_about = None)]
|
||||||
|
struct CliArgs {
|
||||||
|
word1: String,
|
||||||
|
word2: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lcs_len(s1: &[char], s2: &[char]) -> usize {
|
||||||
|
let m = s1.len();
|
||||||
|
let n = s2.len();
|
||||||
|
let mut workspace = HMatrix::<usize>::new(m, n, |(_, _)| 0);
|
||||||
|
for i in 0..m {
|
||||||
|
for j in 0..n {
|
||||||
|
if s1[i] == s2[j] {
|
||||||
|
workspace[(i, j)] = if i == 0 || j == 0 {
|
||||||
|
1
|
||||||
|
} else {
|
||||||
|
workspace[(i - 1, j - 1)] + 1
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
let left = if j > 0 { workspace[(i, j - 1)] } else { 0 };
|
||||||
|
let up = if i > 0 { workspace[(i - 1, j)] } else { 0 };
|
||||||
|
workspace[(i, j)] = usize::max(left, up)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
workspace[(m - 1, n - 1)]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lcs_distance(s1: &[char], s2: &[char]) -> usize {
|
||||||
|
let m = s1.len();
|
||||||
|
let n = s2.len();
|
||||||
|
let max_distance = m;
|
||||||
|
let mut workspace = HMatrix::<usize>::new(m, n, |(_, _)| 0);
|
||||||
|
for i in 0..m {
|
||||||
|
for j in 0..n {
|
||||||
|
if s1[i] == s2[j] {
|
||||||
|
workspace[(i, j)] = if i == 0 || j == 0 {
|
||||||
|
max_distance - 1
|
||||||
|
} else {
|
||||||
|
workspace[(i - 1, j - 1)] - 1
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
let left = if j > 0 {
|
||||||
|
workspace[(i, j - 1)]
|
||||||
|
} else {
|
||||||
|
max_distance
|
||||||
|
};
|
||||||
|
let up = if i > 0 {
|
||||||
|
workspace[(i - 1, j)]
|
||||||
|
} else {
|
||||||
|
max_distance
|
||||||
|
};
|
||||||
|
workspace[(i, j)] = usize::min(left, up)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
workspace[(m - 1, n - 1)]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let args = CliArgs::parse();
|
||||||
|
let s1 = args.word1.chars().collect::<Vec<char>>();
|
||||||
|
let s2 = args.word2.chars().collect::<Vec<char>>();
|
||||||
|
println!("{}", lcs_distance(&s1, &s2));
|
||||||
|
}
|
96
examples/searcher.rs
Normal file
96
examples/searcher.rs
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
extern crate levtree;
|
||||||
|
|
||||||
|
use levtree::{CaseInsensitiveKeyChecker, CaseSensitiveKeyChecker};
|
||||||
|
use levtree::{
|
||||||
|
DamerauLevenshteinDistanceCalculator, DistanceCalculator, KeyChecker, LcsDistanceCalculator,
|
||||||
|
LevTrie, LevenshteinDistanceCalculator, LevenshteinNoSubDistanceCalculator,
|
||||||
|
};
|
||||||
|
|
||||||
|
use std::io::BufRead;
|
||||||
|
use std::io::BufReader;
|
||||||
|
|
||||||
|
use clap::{Parser, ValueEnum};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, ValueEnum)]
|
||||||
|
enum Algorithm {
|
||||||
|
Lcs,
|
||||||
|
Levenshtein,
|
||||||
|
LevenshteinNoSub,
|
||||||
|
DamerauLevenshtein,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Encrypt/decrypt files using catenaccio cipher
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(version, about, long_about = None)]
|
||||||
|
struct CliArgs {
|
||||||
|
/// Word to look up
|
||||||
|
#[arg(short, long)]
|
||||||
|
wordkey: String,
|
||||||
|
|
||||||
|
/// Number of results returned
|
||||||
|
#[arg(short, long, default_value_t = 10)]
|
||||||
|
result_size: usize,
|
||||||
|
|
||||||
|
//Specify distance algorithm
|
||||||
|
#[arg(short, long, required = false, default_value = "damerau-levenshtein")]
|
||||||
|
distance: Algorithm,
|
||||||
|
|
||||||
|
//Toggle case sensitivity
|
||||||
|
#[arg(short, long, required = false, default_value_t = false)]
|
||||||
|
case_sensitive: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CliArgs {}
|
||||||
|
|
||||||
|
fn run<KC: KeyChecker<char>, DC: DistanceCalculator<char, KC>>(args: &CliArgs) {
|
||||||
|
let mut trie = LevTrie::<char, KC>::new();
|
||||||
|
let bytes = include_bytes!("words.txt");
|
||||||
|
let reader = BufReader::new(&bytes[..]);
|
||||||
|
reader
|
||||||
|
.lines()
|
||||||
|
.map(|line| line.unwrap())
|
||||||
|
.for_each(|word: String| {
|
||||||
|
trie.add(word.chars());
|
||||||
|
});
|
||||||
|
let word = args.wordkey.chars().collect::<Vec<char>>();
|
||||||
|
let results = trie.fuzzy_search::<DC>(&word, args.result_size);
|
||||||
|
for result in results {
|
||||||
|
let word: String = trie.lineal_descendant(result.word).into_iter().collect();
|
||||||
|
println!("distance: {}, wordkey: {}", result.distance, word);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let args = CliArgs::parse();
|
||||||
|
if args.case_sensitive {
|
||||||
|
match args.distance {
|
||||||
|
Algorithm::Lcs => {
|
||||||
|
run::<CaseSensitiveKeyChecker, LcsDistanceCalculator>(&args);
|
||||||
|
}
|
||||||
|
Algorithm::Levenshtein => {
|
||||||
|
run::<CaseSensitiveKeyChecker, LevenshteinDistanceCalculator>(&args);
|
||||||
|
}
|
||||||
|
Algorithm::LevenshteinNoSub => {
|
||||||
|
run::<CaseSensitiveKeyChecker, LevenshteinNoSubDistanceCalculator>(&args);
|
||||||
|
}
|
||||||
|
Algorithm::DamerauLevenshtein => {
|
||||||
|
run::<CaseSensitiveKeyChecker, DamerauLevenshteinDistanceCalculator>(&args);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
match args.distance {
|
||||||
|
Algorithm::Lcs => {
|
||||||
|
run::<CaseInsensitiveKeyChecker, LcsDistanceCalculator>(&args);
|
||||||
|
}
|
||||||
|
Algorithm::Levenshtein => {
|
||||||
|
run::<CaseInsensitiveKeyChecker, LevenshteinDistanceCalculator>(&args);
|
||||||
|
}
|
||||||
|
Algorithm::LevenshteinNoSub => {
|
||||||
|
run::<CaseInsensitiveKeyChecker, LevenshteinNoSubDistanceCalculator>(&args);
|
||||||
|
}
|
||||||
|
Algorithm::DamerauLevenshtein => {
|
||||||
|
run::<CaseInsensitiveKeyChecker, DamerauLevenshteinDistanceCalculator>(&args);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
466550
examples/words.txt
Normal file
466550
examples/words.txt
Normal file
File diff suppressed because it is too large
Load Diff
@@ -53,20 +53,20 @@ where
|
|||||||
DC: DistanceCalculator<KEY, KEYCHECKER>,
|
DC: DistanceCalculator<KEY, KEYCHECKER>,
|
||||||
{
|
{
|
||||||
let word_len = word.len();
|
let word_len = word.len();
|
||||||
let workspace: &mut Vec<Vec<usize>> = &mut (0..self.nodes()).map(|_| Vec::new()).collect();
|
|
||||||
let mut result_heap = BinaryHeap::<SearchResult>::with_capacity(max_result + 1);
|
|
||||||
let required_size = word_len + 1;
|
let required_size = word_len + 1;
|
||||||
|
let workspace: &mut Vec<Vec<usize>> = &mut vec![Vec::new(); self.nodes()];
|
||||||
|
let mut result_heap = BinaryHeap::<SearchResult>::with_capacity(max_result + 1);
|
||||||
let visit_pre = |stack: &Vec<usize>| -> VisitOutcome {
|
let visit_pre = |stack: &Vec<usize>| -> VisitOutcome {
|
||||||
let stack_size = stack.len();
|
let stack_size = stack.len();
|
||||||
let current_node_id = *stack.last().unwrap();
|
let current_node_id = *stack.last().unwrap();
|
||||||
let payload = &mut workspace[current_node_id];
|
let payload = &mut workspace[current_node_id];
|
||||||
payload.resize(required_size, usize::default());
|
payload.resize(required_size, usize::default());
|
||||||
if stack_size == 1 {
|
if stack_size == 1 {
|
||||||
for (i, item) in payload.iter_mut().enumerate().take(required_size) {
|
for (i, item) in payload.iter_mut().enumerate() {
|
||||||
*item = i;
|
*item = i;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (i, item) in payload.iter_mut().enumerate().take(required_size) {
|
for (i, item) in payload.iter_mut().enumerate() {
|
||||||
*item = if i == 0 { stack_size - 1 } else { 0 }
|
*item = if i == 0 { stack_size - 1 } else { 0 }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -195,3 +195,92 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct LevenshteinNoSubDistanceCalculator {}
|
||||||
|
|
||||||
|
#[sealed]
|
||||||
|
impl<KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for LevenshteinNoSubDistanceCalculator
|
||||||
|
where
|
||||||
|
KEY: TrieKey,
|
||||||
|
KEYCHECKER: KeyChecker<KEY>,
|
||||||
|
{
|
||||||
|
fn compute(
|
||||||
|
workspace: &mut Vec<Vec<usize>>,
|
||||||
|
nodes: &[LevTrieNode<KEY>],
|
||||||
|
stack: &[usize],
|
||||||
|
wordkey: &[KEY],
|
||||||
|
worst_case: Option<usize>,
|
||||||
|
) -> VisitOutcome {
|
||||||
|
let sz = stack.len();
|
||||||
|
let key_size = wordkey.len();
|
||||||
|
for i in 1..=key_size {
|
||||||
|
if KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 1]].key) {
|
||||||
|
workspace[stack[sz - 1]][i] = workspace[stack[sz - 2]][i - 1];
|
||||||
|
} else {
|
||||||
|
workspace[stack[sz - 1]][i] = std::cmp::min(
|
||||||
|
std::cmp::min(
|
||||||
|
workspace[stack[sz - 1]][i - 1] + 1,
|
||||||
|
workspace[stack[sz - 2]][i - 1] + 2,
|
||||||
|
),
|
||||||
|
workspace[stack[sz - 2]][i] + 1,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let condition = worst_case
|
||||||
|
.map(|wv| wv <= *workspace[stack[sz - 1]][..].iter().min().unwrap())
|
||||||
|
.unwrap_or(false);
|
||||||
|
if condition {
|
||||||
|
VisitOutcome::Skip
|
||||||
|
} else {
|
||||||
|
VisitOutcome::Continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct LcsDistanceCalculator {}
|
||||||
|
|
||||||
|
#[sealed]
|
||||||
|
impl<KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for LcsDistanceCalculator
|
||||||
|
where
|
||||||
|
KEY: TrieKey,
|
||||||
|
KEYCHECKER: KeyChecker<KEY>,
|
||||||
|
{
|
||||||
|
fn compute(
|
||||||
|
workspace: &mut Vec<Vec<usize>>,
|
||||||
|
nodes: &[LevTrieNode<KEY>],
|
||||||
|
stack: &[usize],
|
||||||
|
wordkey: &[KEY],
|
||||||
|
_: Option<usize>,
|
||||||
|
) -> VisitOutcome {
|
||||||
|
let max_distance = wordkey.len();
|
||||||
|
let sz = stack.len();
|
||||||
|
let key_size = wordkey.len();
|
||||||
|
for i in 1..=key_size {
|
||||||
|
if KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 1]].key) {
|
||||||
|
workspace[stack[sz - 1]][i] = if sz == 2 || i == 1 {
|
||||||
|
max_distance - 1
|
||||||
|
} else {
|
||||||
|
workspace[stack[sz - 2]][i - 1] - 1
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
let up = if sz == 2 {
|
||||||
|
max_distance
|
||||||
|
} else {
|
||||||
|
workspace[stack[sz - 2]][i]
|
||||||
|
};
|
||||||
|
let left = if i == 1 {
|
||||||
|
max_distance
|
||||||
|
} else {
|
||||||
|
workspace[stack[sz - 1]][i - 1]
|
||||||
|
};
|
||||||
|
workspace[stack[sz - 1]][i] = std::cmp::min(up, left);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let condition = workspace[stack[sz - 1]][key_size] == 0;
|
||||||
|
if condition {
|
||||||
|
VisitOutcome::Skip
|
||||||
|
} else {
|
||||||
|
VisitOutcome::Continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -4,8 +4,10 @@ mod levtrie;
|
|||||||
// pub use self::levtrie::LevTrieNode as LevTrieNode;
|
// pub use self::levtrie::LevTrieNode as LevTrieNode;
|
||||||
pub use self::levtrie::DamerauLevenshteinDistanceCalculator;
|
pub use self::levtrie::DamerauLevenshteinDistanceCalculator;
|
||||||
pub use self::levtrie::DistanceCalculator;
|
pub use self::levtrie::DistanceCalculator;
|
||||||
|
pub use self::levtrie::LcsDistanceCalculator;
|
||||||
pub use self::levtrie::LevTrie;
|
pub use self::levtrie::LevTrie;
|
||||||
pub use self::levtrie::LevenshteinDistanceCalculator;
|
pub use self::levtrie::LevenshteinDistanceCalculator;
|
||||||
|
pub use self::levtrie::LevenshteinNoSubDistanceCalculator;
|
||||||
|
|
||||||
mod trie;
|
mod trie;
|
||||||
pub use self::trie::Trie;
|
pub use self::trie::Trie;
|
||||||
|
@@ -24,8 +24,7 @@ where
|
|||||||
next: Option<usize>,
|
next: Option<usize>,
|
||||||
parent: Option<usize>,
|
parent: Option<usize>,
|
||||||
child: Option<usize>,
|
child: Option<usize>,
|
||||||
) -> TrieNode<KEY>
|
) -> TrieNode<KEY> {
|
||||||
{
|
|
||||||
TrieNode {
|
TrieNode {
|
||||||
key,
|
key,
|
||||||
prev,
|
prev,
|
||||||
@@ -36,8 +35,7 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub (crate) fn new0(key: Option<KEY>) -> TrieNode<KEY>
|
pub(crate) fn new0(key: Option<KEY>) -> TrieNode<KEY> {
|
||||||
{
|
|
||||||
TrieNode::new(key, None, None, None, None)
|
TrieNode::new(key, None, None, None, None)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user