added least common sequence and levenshtein with no substitution
All checks were successful
CI / build (push) Successful in 9s

This commit is contained in:
2025-07-31 14:35:30 +08:00
parent 1ac5c8fcdf
commit d14c907bf8
10 changed files with 467090 additions and 40 deletions

View File

@@ -7,16 +7,6 @@ use levtree::LevTrie;
use std::io::BufRead;
use std::io::BufReader;
trait IntoCharSlice {
fn into_char_slice(&self) -> Vec<char>;
}
impl IntoCharSlice for str {
fn into_char_slice(&self) -> Vec<char> {
self.chars().into_iter().collect::<Vec<_>>()
}
}
fn main() {
let bytes = include_bytes!("cracklib-small");
let reader = BufReader::new(&bytes[..]);
@@ -28,26 +18,29 @@ fn main() {
trie.add(word.chars());
});
let keys = [
let keys: Vec<Vec<char>> = [
"camel",
"coriolis",
"mattel",
"cruzer",
"cpoper",
"roublesoot",
];
]
.into_iter()
.map(|it| it.chars().collect())
.collect();
for _ in 0..50 {
for key in keys {
let word = &key.into_char_slice()[..];
for key in &keys {
let word = &key;
trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
}
}
for key in keys {
let word = &key.into_char_slice()[..];
let word = &key;
let results = trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
println!("needle: {}", key);
println!("needle: {}", key.iter().collect::<String>());
for result in results {
let word: String = trie.lineal_descendant(result.word).into_iter().collect();
println!("distance: {}, wordkey: {}", result.distance, word);

69
examples/lcs.rs Normal file
View File

@@ -0,0 +1,69 @@
use clap::Parser;
use rmath::HMatrix;
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct CliArgs {
word1: String,
word2: String,
}
fn lcs_len(s1: &[char], s2: &[char]) -> usize {
let m = s1.len();
let n = s2.len();
let mut workspace = HMatrix::<usize>::new(m, n, |(_, _)| 0);
for i in 0..m {
for j in 0..n {
if s1[i] == s2[j] {
workspace[(i, j)] = if i == 0 || j == 0 {
1
} else {
workspace[(i - 1, j - 1)] + 1
};
} else {
let left = if j > 0 { workspace[(i, j - 1)] } else { 0 };
let up = if i > 0 { workspace[(i - 1, j)] } else { 0 };
workspace[(i, j)] = usize::max(left, up)
}
}
}
workspace[(m - 1, n - 1)]
}
fn lcs_distance(s1: &[char], s2: &[char]) -> usize {
let m = s1.len();
let n = s2.len();
let max_distance = m;
let mut workspace = HMatrix::<usize>::new(m, n, |(_, _)| 0);
for i in 0..m {
for j in 0..n {
if s1[i] == s2[j] {
workspace[(i, j)] = if i == 0 || j == 0 {
max_distance - 1
} else {
workspace[(i - 1, j - 1)] - 1
};
} else {
let left = if j > 0 {
workspace[(i, j - 1)]
} else {
max_distance
};
let up = if i > 0 {
workspace[(i - 1, j)]
} else {
max_distance
};
workspace[(i, j)] = usize::min(left, up)
}
}
}
workspace[(m - 1, n - 1)]
}
fn main() {
let args = CliArgs::parse();
let s1 = args.word1.chars().collect::<Vec<char>>();
let s2 = args.word2.chars().collect::<Vec<char>>();
println!("{}", lcs_distance(&s1, &s2));
}

96
examples/searcher.rs Normal file
View File

@@ -0,0 +1,96 @@
extern crate levtree;
use levtree::{CaseInsensitiveKeyChecker, CaseSensitiveKeyChecker};
use levtree::{
DamerauLevenshteinDistanceCalculator, DistanceCalculator, KeyChecker, LcsDistanceCalculator,
LevTrie, LevenshteinDistanceCalculator, LevenshteinNoSubDistanceCalculator,
};
use std::io::BufRead;
use std::io::BufReader;
use clap::{Parser, ValueEnum};
#[derive(Debug, Clone, ValueEnum)]
enum Algorithm {
Lcs,
Levenshtein,
LevenshteinNoSub,
DamerauLevenshtein,
}
/// Encrypt/decrypt files using catenaccio cipher
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct CliArgs {
/// Word to look up
#[arg(short, long)]
wordkey: String,
/// Number of results returned
#[arg(short, long, default_value_t = 10)]
result_size: usize,
//Specify distance algorithm
#[arg(short, long, required = false, default_value = "damerau-levenshtein")]
distance: Algorithm,
//Toggle case sensitivity
#[arg(short, long, required = false, default_value_t = false)]
case_sensitive: bool,
}
impl CliArgs {}
fn run<KC: KeyChecker<char>, DC: DistanceCalculator<char, KC>>(args: &CliArgs) {
let mut trie = LevTrie::<char, KC>::new();
let bytes = include_bytes!("words.txt");
let reader = BufReader::new(&bytes[..]);
reader
.lines()
.map(|line| line.unwrap())
.for_each(|word: String| {
trie.add(word.chars());
});
let word = args.wordkey.chars().collect::<Vec<char>>();
let results = trie.fuzzy_search::<DC>(&word, args.result_size);
for result in results {
let word: String = trie.lineal_descendant(result.word).into_iter().collect();
println!("distance: {}, wordkey: {}", result.distance, word);
}
}
fn main() {
let args = CliArgs::parse();
if args.case_sensitive {
match args.distance {
Algorithm::Lcs => {
run::<CaseSensitiveKeyChecker, LcsDistanceCalculator>(&args);
}
Algorithm::Levenshtein => {
run::<CaseSensitiveKeyChecker, LevenshteinDistanceCalculator>(&args);
}
Algorithm::LevenshteinNoSub => {
run::<CaseSensitiveKeyChecker, LevenshteinNoSubDistanceCalculator>(&args);
}
Algorithm::DamerauLevenshtein => {
run::<CaseSensitiveKeyChecker, DamerauLevenshteinDistanceCalculator>(&args);
}
}
} else {
match args.distance {
Algorithm::Lcs => {
run::<CaseInsensitiveKeyChecker, LcsDistanceCalculator>(&args);
}
Algorithm::Levenshtein => {
run::<CaseInsensitiveKeyChecker, LevenshteinDistanceCalculator>(&args);
}
Algorithm::LevenshteinNoSub => {
run::<CaseInsensitiveKeyChecker, LevenshteinNoSubDistanceCalculator>(&args);
}
Algorithm::DamerauLevenshtein => {
run::<CaseInsensitiveKeyChecker, DamerauLevenshteinDistanceCalculator>(&args);
}
}
}
}

466550
examples/words.txt Normal file

File diff suppressed because it is too large Load Diff