added least common sequence and levenshtein with no substitution
All checks were successful
CI / build (push) Successful in 9s
All checks were successful
CI / build (push) Successful in 9s
This commit is contained in:
@@ -1,4 +1,7 @@
|
||||
[registry]
|
||||
default = "gitea"
|
||||
|
||||
[registries.gitea]
|
||||
global-credential-providers = ["cargo:token"]
|
||||
index = "sparse+https://gitea.woggioni.net/api/packages/woggioni/cargo/"
|
||||
|
||||
|
263
Cargo.lock
generated
263
Cargo.lock
generated
@@ -2,55 +2,215 @@
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.6.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon",
|
||||
"colorchoice",
|
||||
"is_terminal_polyfill",
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle"
|
||||
version = "1.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-parse"
|
||||
version = "0.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
|
||||
dependencies = [
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-query"
|
||||
version = "1.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-wincon"
|
||||
version = "3.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"once_cell_polyfill",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.5.42"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed87a9d530bb41a67537289bafcac159cb3ee28460e0a4571123d2a778a6a882"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
"clap_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.5.42"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64f4f3f3c77c94aff3c7e9aac9a2ca1974a5adf392a8bb751e827d6d127ab966"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
"clap_lex",
|
||||
"strsim",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_derive"
|
||||
version = "4.5.41"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491"
|
||||
dependencies = [
|
||||
"heck 0.5.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "0.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
|
||||
|
||||
[[package]]
|
||||
name = "colorchoice"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "is_terminal_polyfill"
|
||||
version = "1.70.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
||||
|
||||
[[package]]
|
||||
name = "levtree"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"rmath",
|
||||
"sealed",
|
||||
"trait-group",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.70"
|
||||
name = "num-traits"
|
||||
version = "0.2.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
|
||||
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell_polyfill"
|
||||
version = "1.70.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
|
||||
|
||||
[[package]]
|
||||
name = "opimps"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "857dabe64a7afe2e51ac9962dc3c008e74ae050dd47e21a7e7b1fc69a67a0229"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.95"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.33"
|
||||
version = "1.0.40"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
|
||||
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rmath"
|
||||
version = "0.1.0"
|
||||
source = "sparse+https://gitea.woggioni.net/api/packages/woggioni/cargo/"
|
||||
checksum = "73da6144552f77474e00a800955098b34da9bd10fde4c1570290978c2c48da27"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
"opimps",
|
||||
"sealed",
|
||||
"trait-group",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sealed"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4a8caec23b7800fb97971a1c6ae365b6239aaeddfb934d6265f8505e795699d"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"heck 0.4.1",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.39"
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a"
|
||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.104"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -65,6 +225,85 @@ checksum = "e1b362975c6f0f21a41fbb9ca91fe5dcb7e01e12331360374347476b45f5cb9c"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.12"
|
||||
version = "1.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
|
||||
|
||||
[[package]]
|
||||
name = "utf8parse"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
||||
dependencies = [
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_gnullvm",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||
|
17
Cargo.toml
17
Cargo.toml
@@ -1,14 +1,18 @@
|
||||
[package]
|
||||
name = "levtree"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
authors = ["Walter Oggioni <oggioni.walter@gmail.com>"]
|
||||
license = "MIT"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
trait-group = "0.1.0"
|
||||
trait-group = "0.1"
|
||||
sealed = "0.5"
|
||||
|
||||
[dev-dependencies]
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
rmath = { version = "0.1", registry = "gitea" }
|
||||
|
||||
[lib]
|
||||
name = "levtree"
|
||||
crate-type = ["lib"]
|
||||
@@ -16,6 +20,13 @@ bench = false
|
||||
|
||||
|
||||
[[example]]
|
||||
name = "levtree_benchmark"
|
||||
name = "benchmark"
|
||||
path = "examples/benchmark.rs"
|
||||
|
||||
[[example]]
|
||||
name = "searcher"
|
||||
path = "examples/searcher.rs"
|
||||
|
||||
[[example]]
|
||||
name = "lcs"
|
||||
path = "examples/lcs.rs"
|
||||
|
@@ -7,16 +7,6 @@ use levtree::LevTrie;
|
||||
use std::io::BufRead;
|
||||
use std::io::BufReader;
|
||||
|
||||
trait IntoCharSlice {
|
||||
fn into_char_slice(&self) -> Vec<char>;
|
||||
}
|
||||
|
||||
impl IntoCharSlice for str {
|
||||
fn into_char_slice(&self) -> Vec<char> {
|
||||
self.chars().into_iter().collect::<Vec<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let bytes = include_bytes!("cracklib-small");
|
||||
let reader = BufReader::new(&bytes[..]);
|
||||
@@ -28,26 +18,29 @@ fn main() {
|
||||
trie.add(word.chars());
|
||||
});
|
||||
|
||||
let keys = [
|
||||
let keys: Vec<Vec<char>> = [
|
||||
"camel",
|
||||
"coriolis",
|
||||
"mattel",
|
||||
"cruzer",
|
||||
"cpoper",
|
||||
"roublesoot",
|
||||
];
|
||||
]
|
||||
.into_iter()
|
||||
.map(|it| it.chars().collect())
|
||||
.collect();
|
||||
|
||||
for _ in 0..50 {
|
||||
for key in keys {
|
||||
let word = &key.into_char_slice()[..];
|
||||
for key in &keys {
|
||||
let word = &key;
|
||||
trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
|
||||
}
|
||||
}
|
||||
|
||||
for key in keys {
|
||||
let word = &key.into_char_slice()[..];
|
||||
let word = &key;
|
||||
let results = trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
|
||||
println!("needle: {}", key);
|
||||
println!("needle: {}", key.iter().collect::<String>());
|
||||
for result in results {
|
||||
let word: String = trie.lineal_descendant(result.word).into_iter().collect();
|
||||
println!("distance: {}, wordkey: {}", result.distance, word);
|
||||
|
69
examples/lcs.rs
Normal file
69
examples/lcs.rs
Normal file
@@ -0,0 +1,69 @@
|
||||
use clap::Parser;
|
||||
use rmath::HMatrix;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(version, about, long_about = None)]
|
||||
struct CliArgs {
|
||||
word1: String,
|
||||
word2: String,
|
||||
}
|
||||
|
||||
fn lcs_len(s1: &[char], s2: &[char]) -> usize {
|
||||
let m = s1.len();
|
||||
let n = s2.len();
|
||||
let mut workspace = HMatrix::<usize>::new(m, n, |(_, _)| 0);
|
||||
for i in 0..m {
|
||||
for j in 0..n {
|
||||
if s1[i] == s2[j] {
|
||||
workspace[(i, j)] = if i == 0 || j == 0 {
|
||||
1
|
||||
} else {
|
||||
workspace[(i - 1, j - 1)] + 1
|
||||
};
|
||||
} else {
|
||||
let left = if j > 0 { workspace[(i, j - 1)] } else { 0 };
|
||||
let up = if i > 0 { workspace[(i - 1, j)] } else { 0 };
|
||||
workspace[(i, j)] = usize::max(left, up)
|
||||
}
|
||||
}
|
||||
}
|
||||
workspace[(m - 1, n - 1)]
|
||||
}
|
||||
|
||||
fn lcs_distance(s1: &[char], s2: &[char]) -> usize {
|
||||
let m = s1.len();
|
||||
let n = s2.len();
|
||||
let max_distance = m;
|
||||
let mut workspace = HMatrix::<usize>::new(m, n, |(_, _)| 0);
|
||||
for i in 0..m {
|
||||
for j in 0..n {
|
||||
if s1[i] == s2[j] {
|
||||
workspace[(i, j)] = if i == 0 || j == 0 {
|
||||
max_distance - 1
|
||||
} else {
|
||||
workspace[(i - 1, j - 1)] - 1
|
||||
};
|
||||
} else {
|
||||
let left = if j > 0 {
|
||||
workspace[(i, j - 1)]
|
||||
} else {
|
||||
max_distance
|
||||
};
|
||||
let up = if i > 0 {
|
||||
workspace[(i - 1, j)]
|
||||
} else {
|
||||
max_distance
|
||||
};
|
||||
workspace[(i, j)] = usize::min(left, up)
|
||||
}
|
||||
}
|
||||
}
|
||||
workspace[(m - 1, n - 1)]
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let args = CliArgs::parse();
|
||||
let s1 = args.word1.chars().collect::<Vec<char>>();
|
||||
let s2 = args.word2.chars().collect::<Vec<char>>();
|
||||
println!("{}", lcs_distance(&s1, &s2));
|
||||
}
|
96
examples/searcher.rs
Normal file
96
examples/searcher.rs
Normal file
@@ -0,0 +1,96 @@
|
||||
extern crate levtree;
|
||||
|
||||
use levtree::{CaseInsensitiveKeyChecker, CaseSensitiveKeyChecker};
|
||||
use levtree::{
|
||||
DamerauLevenshteinDistanceCalculator, DistanceCalculator, KeyChecker, LcsDistanceCalculator,
|
||||
LevTrie, LevenshteinDistanceCalculator, LevenshteinNoSubDistanceCalculator,
|
||||
};
|
||||
|
||||
use std::io::BufRead;
|
||||
use std::io::BufReader;
|
||||
|
||||
use clap::{Parser, ValueEnum};
|
||||
|
||||
#[derive(Debug, Clone, ValueEnum)]
|
||||
enum Algorithm {
|
||||
Lcs,
|
||||
Levenshtein,
|
||||
LevenshteinNoSub,
|
||||
DamerauLevenshtein,
|
||||
}
|
||||
|
||||
/// Encrypt/decrypt files using catenaccio cipher
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(version, about, long_about = None)]
|
||||
struct CliArgs {
|
||||
/// Word to look up
|
||||
#[arg(short, long)]
|
||||
wordkey: String,
|
||||
|
||||
/// Number of results returned
|
||||
#[arg(short, long, default_value_t = 10)]
|
||||
result_size: usize,
|
||||
|
||||
//Specify distance algorithm
|
||||
#[arg(short, long, required = false, default_value = "damerau-levenshtein")]
|
||||
distance: Algorithm,
|
||||
|
||||
//Toggle case sensitivity
|
||||
#[arg(short, long, required = false, default_value_t = false)]
|
||||
case_sensitive: bool,
|
||||
}
|
||||
|
||||
impl CliArgs {}
|
||||
|
||||
fn run<KC: KeyChecker<char>, DC: DistanceCalculator<char, KC>>(args: &CliArgs) {
|
||||
let mut trie = LevTrie::<char, KC>::new();
|
||||
let bytes = include_bytes!("words.txt");
|
||||
let reader = BufReader::new(&bytes[..]);
|
||||
reader
|
||||
.lines()
|
||||
.map(|line| line.unwrap())
|
||||
.for_each(|word: String| {
|
||||
trie.add(word.chars());
|
||||
});
|
||||
let word = args.wordkey.chars().collect::<Vec<char>>();
|
||||
let results = trie.fuzzy_search::<DC>(&word, args.result_size);
|
||||
for result in results {
|
||||
let word: String = trie.lineal_descendant(result.word).into_iter().collect();
|
||||
println!("distance: {}, wordkey: {}", result.distance, word);
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let args = CliArgs::parse();
|
||||
if args.case_sensitive {
|
||||
match args.distance {
|
||||
Algorithm::Lcs => {
|
||||
run::<CaseSensitiveKeyChecker, LcsDistanceCalculator>(&args);
|
||||
}
|
||||
Algorithm::Levenshtein => {
|
||||
run::<CaseSensitiveKeyChecker, LevenshteinDistanceCalculator>(&args);
|
||||
}
|
||||
Algorithm::LevenshteinNoSub => {
|
||||
run::<CaseSensitiveKeyChecker, LevenshteinNoSubDistanceCalculator>(&args);
|
||||
}
|
||||
Algorithm::DamerauLevenshtein => {
|
||||
run::<CaseSensitiveKeyChecker, DamerauLevenshteinDistanceCalculator>(&args);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
match args.distance {
|
||||
Algorithm::Lcs => {
|
||||
run::<CaseInsensitiveKeyChecker, LcsDistanceCalculator>(&args);
|
||||
}
|
||||
Algorithm::Levenshtein => {
|
||||
run::<CaseInsensitiveKeyChecker, LevenshteinDistanceCalculator>(&args);
|
||||
}
|
||||
Algorithm::LevenshteinNoSub => {
|
||||
run::<CaseInsensitiveKeyChecker, LevenshteinNoSubDistanceCalculator>(&args);
|
||||
}
|
||||
Algorithm::DamerauLevenshtein => {
|
||||
run::<CaseInsensitiveKeyChecker, DamerauLevenshteinDistanceCalculator>(&args);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
466550
examples/words.txt
Normal file
466550
examples/words.txt
Normal file
File diff suppressed because it is too large
Load Diff
@@ -53,20 +53,20 @@ where
|
||||
DC: DistanceCalculator<KEY, KEYCHECKER>,
|
||||
{
|
||||
let word_len = word.len();
|
||||
let workspace: &mut Vec<Vec<usize>> = &mut (0..self.nodes()).map(|_| Vec::new()).collect();
|
||||
let mut result_heap = BinaryHeap::<SearchResult>::with_capacity(max_result + 1);
|
||||
let required_size = word_len + 1;
|
||||
let workspace: &mut Vec<Vec<usize>> = &mut vec![Vec::new(); self.nodes()];
|
||||
let mut result_heap = BinaryHeap::<SearchResult>::with_capacity(max_result + 1);
|
||||
let visit_pre = |stack: &Vec<usize>| -> VisitOutcome {
|
||||
let stack_size = stack.len();
|
||||
let current_node_id = *stack.last().unwrap();
|
||||
let payload = &mut workspace[current_node_id];
|
||||
payload.resize(required_size, usize::default());
|
||||
if stack_size == 1 {
|
||||
for (i, item) in payload.iter_mut().enumerate().take(required_size) {
|
||||
for (i, item) in payload.iter_mut().enumerate() {
|
||||
*item = i;
|
||||
}
|
||||
} else {
|
||||
for (i, item) in payload.iter_mut().enumerate().take(required_size) {
|
||||
for (i, item) in payload.iter_mut().enumerate() {
|
||||
*item = if i == 0 { stack_size - 1 } else { 0 }
|
||||
}
|
||||
}
|
||||
@@ -195,3 +195,92 @@ where
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LevenshteinNoSubDistanceCalculator {}
|
||||
|
||||
#[sealed]
|
||||
impl<KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for LevenshteinNoSubDistanceCalculator
|
||||
where
|
||||
KEY: TrieKey,
|
||||
KEYCHECKER: KeyChecker<KEY>,
|
||||
{
|
||||
fn compute(
|
||||
workspace: &mut Vec<Vec<usize>>,
|
||||
nodes: &[LevTrieNode<KEY>],
|
||||
stack: &[usize],
|
||||
wordkey: &[KEY],
|
||||
worst_case: Option<usize>,
|
||||
) -> VisitOutcome {
|
||||
let sz = stack.len();
|
||||
let key_size = wordkey.len();
|
||||
for i in 1..=key_size {
|
||||
if KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 1]].key) {
|
||||
workspace[stack[sz - 1]][i] = workspace[stack[sz - 2]][i - 1];
|
||||
} else {
|
||||
workspace[stack[sz - 1]][i] = std::cmp::min(
|
||||
std::cmp::min(
|
||||
workspace[stack[sz - 1]][i - 1] + 1,
|
||||
workspace[stack[sz - 2]][i - 1] + 2,
|
||||
),
|
||||
workspace[stack[sz - 2]][i] + 1,
|
||||
);
|
||||
}
|
||||
}
|
||||
let condition = worst_case
|
||||
.map(|wv| wv <= *workspace[stack[sz - 1]][..].iter().min().unwrap())
|
||||
.unwrap_or(false);
|
||||
if condition {
|
||||
VisitOutcome::Skip
|
||||
} else {
|
||||
VisitOutcome::Continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LcsDistanceCalculator {}
|
||||
|
||||
#[sealed]
|
||||
impl<KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for LcsDistanceCalculator
|
||||
where
|
||||
KEY: TrieKey,
|
||||
KEYCHECKER: KeyChecker<KEY>,
|
||||
{
|
||||
fn compute(
|
||||
workspace: &mut Vec<Vec<usize>>,
|
||||
nodes: &[LevTrieNode<KEY>],
|
||||
stack: &[usize],
|
||||
wordkey: &[KEY],
|
||||
_: Option<usize>,
|
||||
) -> VisitOutcome {
|
||||
let max_distance = wordkey.len();
|
||||
let sz = stack.len();
|
||||
let key_size = wordkey.len();
|
||||
for i in 1..=key_size {
|
||||
if KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 1]].key) {
|
||||
workspace[stack[sz - 1]][i] = if sz == 2 || i == 1 {
|
||||
max_distance - 1
|
||||
} else {
|
||||
workspace[stack[sz - 2]][i - 1] - 1
|
||||
};
|
||||
} else {
|
||||
let up = if sz == 2 {
|
||||
max_distance
|
||||
} else {
|
||||
workspace[stack[sz - 2]][i]
|
||||
};
|
||||
let left = if i == 1 {
|
||||
max_distance
|
||||
} else {
|
||||
workspace[stack[sz - 1]][i - 1]
|
||||
};
|
||||
workspace[stack[sz - 1]][i] = std::cmp::min(up, left);
|
||||
}
|
||||
}
|
||||
let condition = workspace[stack[sz - 1]][key_size] == 0;
|
||||
if condition {
|
||||
VisitOutcome::Skip
|
||||
} else {
|
||||
VisitOutcome::Continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -4,8 +4,10 @@ mod levtrie;
|
||||
// pub use self::levtrie::LevTrieNode as LevTrieNode;
|
||||
pub use self::levtrie::DamerauLevenshteinDistanceCalculator;
|
||||
pub use self::levtrie::DistanceCalculator;
|
||||
pub use self::levtrie::LcsDistanceCalculator;
|
||||
pub use self::levtrie::LevTrie;
|
||||
pub use self::levtrie::LevenshteinDistanceCalculator;
|
||||
pub use self::levtrie::LevenshteinNoSubDistanceCalculator;
|
||||
|
||||
mod trie;
|
||||
pub use self::trie::Trie;
|
||||
|
@@ -24,8 +24,7 @@ where
|
||||
next: Option<usize>,
|
||||
parent: Option<usize>,
|
||||
child: Option<usize>,
|
||||
) -> TrieNode<KEY>
|
||||
{
|
||||
) -> TrieNode<KEY> {
|
||||
TrieNode {
|
||||
key,
|
||||
prev,
|
||||
@@ -36,8 +35,7 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
pub (crate) fn new0(key: Option<KEY>) -> TrieNode<KEY>
|
||||
{
|
||||
pub(crate) fn new0(key: Option<KEY>) -> TrieNode<KEY> {
|
||||
TrieNode::new(key, None, None, None, None)
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user