Compare commits
1 Commits
master
..
a8a5ad9491
| Author | SHA1 | Date | |
|---|---|---|---|
| a8a5ad9491 |
@@ -1,7 +0,0 @@
|
|||||||
[registry]
|
|
||||||
default = "gitea"
|
|
||||||
|
|
||||||
[registries.gitea]
|
|
||||||
global-credential-providers = ["cargo:token"]
|
|
||||||
index = "sparse+https://gitea.woggioni.net/api/packages/woggioni/cargo/"
|
|
||||||
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
name: CI
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [ master ]
|
|
||||||
jobs:
|
|
||||||
build:
|
|
||||||
runs-on: woryzen
|
|
||||||
steps:
|
|
||||||
- name: Checkout sources
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Run unit tests
|
|
||||||
run: |
|
|
||||||
cargo test
|
|
||||||
- name: Publish artifacts
|
|
||||||
env:
|
|
||||||
CARGO_REGISTRIES_GITEA_TOKEN: Bearer ${{ secrets.PUBLISHER_TOKEN }}
|
|
||||||
run: |
|
|
||||||
cargo publish --registry=gitea
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
/target
|
|
||||||
**/*.rs.bk
|
|
||||||
Generated
-309
@@ -1,309 +0,0 @@
|
|||||||
# This file is automatically @generated by Cargo.
|
|
||||||
# It is not intended for manual editing.
|
|
||||||
version = 4
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "anstream"
|
|
||||||
version = "0.6.19"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933"
|
|
||||||
dependencies = [
|
|
||||||
"anstyle",
|
|
||||||
"anstyle-parse",
|
|
||||||
"anstyle-query",
|
|
||||||
"anstyle-wincon",
|
|
||||||
"colorchoice",
|
|
||||||
"is_terminal_polyfill",
|
|
||||||
"utf8parse",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "anstyle"
|
|
||||||
version = "1.0.11"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "anstyle-parse"
|
|
||||||
version = "0.2.7"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
|
|
||||||
dependencies = [
|
|
||||||
"utf8parse",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "anstyle-query"
|
|
||||||
version = "1.1.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
|
|
||||||
dependencies = [
|
|
||||||
"windows-sys",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "anstyle-wincon"
|
|
||||||
version = "3.0.9"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882"
|
|
||||||
dependencies = [
|
|
||||||
"anstyle",
|
|
||||||
"once_cell_polyfill",
|
|
||||||
"windows-sys",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "autocfg"
|
|
||||||
version = "1.5.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "clap"
|
|
||||||
version = "4.5.42"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ed87a9d530bb41a67537289bafcac159cb3ee28460e0a4571123d2a778a6a882"
|
|
||||||
dependencies = [
|
|
||||||
"clap_builder",
|
|
||||||
"clap_derive",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "clap_builder"
|
|
||||||
version = "4.5.42"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "64f4f3f3c77c94aff3c7e9aac9a2ca1974a5adf392a8bb751e827d6d127ab966"
|
|
||||||
dependencies = [
|
|
||||||
"anstream",
|
|
||||||
"anstyle",
|
|
||||||
"clap_lex",
|
|
||||||
"strsim",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "clap_derive"
|
|
||||||
version = "4.5.41"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491"
|
|
||||||
dependencies = [
|
|
||||||
"heck 0.5.0",
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"syn",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "clap_lex"
|
|
||||||
version = "0.7.5"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "colorchoice"
|
|
||||||
version = "1.0.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "heck"
|
|
||||||
version = "0.4.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "heck"
|
|
||||||
version = "0.5.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "is_terminal_polyfill"
|
|
||||||
version = "1.70.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "levtree"
|
|
||||||
version = "0.1.2"
|
|
||||||
dependencies = [
|
|
||||||
"clap",
|
|
||||||
"rmath",
|
|
||||||
"sealed",
|
|
||||||
"trait-group",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "num-traits"
|
|
||||||
version = "0.2.19"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
|
||||||
dependencies = [
|
|
||||||
"autocfg",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "once_cell_polyfill"
|
|
||||||
version = "1.70.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "opimps"
|
|
||||||
version = "0.2.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "857dabe64a7afe2e51ac9962dc3c008e74ae050dd47e21a7e7b1fc69a67a0229"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"syn",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "proc-macro2"
|
|
||||||
version = "1.0.95"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
|
|
||||||
dependencies = [
|
|
||||||
"unicode-ident",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "quote"
|
|
||||||
version = "1.0.40"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "rmath"
|
|
||||||
version = "0.1.0"
|
|
||||||
source = "sparse+https://gitea.woggioni.net/api/packages/woggioni/cargo/"
|
|
||||||
checksum = "73da6144552f77474e00a800955098b34da9bd10fde4c1570290978c2c48da27"
|
|
||||||
dependencies = [
|
|
||||||
"num-traits",
|
|
||||||
"opimps",
|
|
||||||
"sealed",
|
|
||||||
"trait-group",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "sealed"
|
|
||||||
version = "0.5.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "f4a8caec23b7800fb97971a1c6ae365b6239aaeddfb934d6265f8505e795699d"
|
|
||||||
dependencies = [
|
|
||||||
"heck 0.4.1",
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"syn",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "strsim"
|
|
||||||
version = "0.11.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "syn"
|
|
||||||
version = "2.0.104"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"unicode-ident",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "trait-group"
|
|
||||||
version = "0.1.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e1b362975c6f0f21a41fbb9ca91fe5dcb7e01e12331360374347476b45f5cb9c"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "unicode-ident"
|
|
||||||
version = "1.0.18"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "utf8parse"
|
|
||||||
version = "0.2.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows-sys"
|
|
||||||
version = "0.59.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
|
||||||
dependencies = [
|
|
||||||
"windows-targets",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows-targets"
|
|
||||||
version = "0.52.6"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
|
||||||
dependencies = [
|
|
||||||
"windows_aarch64_gnullvm",
|
|
||||||
"windows_aarch64_msvc",
|
|
||||||
"windows_i686_gnu",
|
|
||||||
"windows_i686_gnullvm",
|
|
||||||
"windows_i686_msvc",
|
|
||||||
"windows_x86_64_gnu",
|
|
||||||
"windows_x86_64_gnullvm",
|
|
||||||
"windows_x86_64_msvc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows_aarch64_gnullvm"
|
|
||||||
version = "0.52.6"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows_aarch64_msvc"
|
|
||||||
version = "0.52.6"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows_i686_gnu"
|
|
||||||
version = "0.52.6"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows_i686_gnullvm"
|
|
||||||
version = "0.52.6"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows_i686_msvc"
|
|
||||||
version = "0.52.6"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows_x86_64_gnu"
|
|
||||||
version = "0.52.6"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows_x86_64_gnullvm"
|
|
||||||
version = "0.52.6"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "windows_x86_64_msvc"
|
|
||||||
version = "0.52.6"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
|
||||||
-32
@@ -1,32 +0,0 @@
|
|||||||
[package]
|
|
||||||
name = "levtree"
|
|
||||||
version = "0.1.2"
|
|
||||||
authors = ["Walter Oggioni <oggioni.walter@gmail.com>"]
|
|
||||||
license = "MIT"
|
|
||||||
edition = "2024"
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
trait-group = "0.1"
|
|
||||||
sealed = "0.5"
|
|
||||||
|
|
||||||
[dev-dependencies]
|
|
||||||
clap = { version = "4.5", features = ["derive"] }
|
|
||||||
rmath = { version = "0.1", registry = "gitea" }
|
|
||||||
|
|
||||||
[lib]
|
|
||||||
name = "levtree"
|
|
||||||
crate-type = ["lib"]
|
|
||||||
bench = false
|
|
||||||
|
|
||||||
|
|
||||||
[[example]]
|
|
||||||
name = "benchmark"
|
|
||||||
path = "examples/benchmark.rs"
|
|
||||||
|
|
||||||
[[example]]
|
|
||||||
name = "searcher"
|
|
||||||
path = "examples/searcher.rs"
|
|
||||||
|
|
||||||
[[example]]
|
|
||||||
name = "lcs"
|
|
||||||
path = "examples/lcs.rs"
|
|
||||||
@@ -1,50 +0,0 @@
|
|||||||
extern crate levtree;
|
|
||||||
|
|
||||||
use levtree::CaseSensitiveLevTrie;
|
|
||||||
use levtree::DamerauLevenshteinDistanceCalculator;
|
|
||||||
use levtree::LevTrie;
|
|
||||||
|
|
||||||
use std::io::BufRead;
|
|
||||||
use std::io::BufReader;
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
let bytes = include_bytes!("cracklib-small");
|
|
||||||
let reader = BufReader::new(&bytes[..]);
|
|
||||||
let mut trie: CaseSensitiveLevTrie = LevTrie::new();
|
|
||||||
reader
|
|
||||||
.lines()
|
|
||||||
.map(|line| line.unwrap())
|
|
||||||
.for_each(|word: String| {
|
|
||||||
trie.add(word.chars());
|
|
||||||
});
|
|
||||||
|
|
||||||
let keys: Vec<Vec<char>> = [
|
|
||||||
"camel",
|
|
||||||
"coriolis",
|
|
||||||
"mattel",
|
|
||||||
"cruzer",
|
|
||||||
"cpoper",
|
|
||||||
"roublesoot",
|
|
||||||
]
|
|
||||||
.into_iter()
|
|
||||||
.map(|it| it.chars().collect())
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
for _ in 0..50 {
|
|
||||||
for key in &keys {
|
|
||||||
let word = &key;
|
|
||||||
trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for key in keys {
|
|
||||||
let word = &key;
|
|
||||||
let results = trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(word, 6);
|
|
||||||
println!("needle: {}", key.iter().collect::<String>());
|
|
||||||
for result in results {
|
|
||||||
let word: String = trie.lineal_descendant(result.word).into_iter().collect();
|
|
||||||
println!("distance: {}, wordkey: {}", result.distance, word);
|
|
||||||
}
|
|
||||||
println!("")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
-54763
File diff suppressed because it is too large
Load Diff
@@ -1,69 +0,0 @@
|
|||||||
use clap::Parser;
|
|
||||||
use rmath::HMatrix;
|
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
|
||||||
#[command(version, about, long_about = None)]
|
|
||||||
struct CliArgs {
|
|
||||||
word1: String,
|
|
||||||
word2: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
fn lcs_len(s1: &[char], s2: &[char]) -> usize {
|
|
||||||
let m = s1.len();
|
|
||||||
let n = s2.len();
|
|
||||||
let mut workspace = HMatrix::<usize>::new(m, n, |(_, _)| 0);
|
|
||||||
for i in 0..m {
|
|
||||||
for j in 0..n {
|
|
||||||
if s1[i] == s2[j] {
|
|
||||||
workspace[(i, j)] = if i == 0 || j == 0 {
|
|
||||||
1
|
|
||||||
} else {
|
|
||||||
workspace[(i - 1, j - 1)] + 1
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
let left = if j > 0 { workspace[(i, j - 1)] } else { 0 };
|
|
||||||
let up = if i > 0 { workspace[(i - 1, j)] } else { 0 };
|
|
||||||
workspace[(i, j)] = usize::max(left, up)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
workspace[(m - 1, n - 1)]
|
|
||||||
}
|
|
||||||
|
|
||||||
fn lcs_distance(s1: &[char], s2: &[char]) -> usize {
|
|
||||||
let m = s1.len();
|
|
||||||
let n = s2.len();
|
|
||||||
let max_distance = m;
|
|
||||||
let mut workspace = HMatrix::<usize>::new(m, n, |(_, _)| 0);
|
|
||||||
for i in 0..m {
|
|
||||||
for j in 0..n {
|
|
||||||
if s1[i] == s2[j] {
|
|
||||||
workspace[(i, j)] = if i == 0 || j == 0 {
|
|
||||||
max_distance - 1
|
|
||||||
} else {
|
|
||||||
workspace[(i - 1, j - 1)] - 1
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
let left = if j > 0 {
|
|
||||||
workspace[(i, j - 1)]
|
|
||||||
} else {
|
|
||||||
max_distance
|
|
||||||
};
|
|
||||||
let up = if i > 0 {
|
|
||||||
workspace[(i - 1, j)]
|
|
||||||
} else {
|
|
||||||
max_distance
|
|
||||||
};
|
|
||||||
workspace[(i, j)] = usize::min(left, up)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
workspace[(m - 1, n - 1)]
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
let args = CliArgs::parse();
|
|
||||||
let s1 = args.word1.chars().collect::<Vec<char>>();
|
|
||||||
let s2 = args.word2.chars().collect::<Vec<char>>();
|
|
||||||
println!("{}", lcs_distance(&s1, &s2));
|
|
||||||
}
|
|
||||||
@@ -1,96 +0,0 @@
|
|||||||
extern crate levtree;
|
|
||||||
|
|
||||||
use levtree::{CaseInsensitiveKeyChecker, CaseSensitiveKeyChecker};
|
|
||||||
use levtree::{
|
|
||||||
DamerauLevenshteinDistanceCalculator, DistanceCalculator, KeyChecker, LcsDistanceCalculator,
|
|
||||||
LevTrie, LevenshteinDistanceCalculator, LevenshteinNoSubDistanceCalculator,
|
|
||||||
};
|
|
||||||
|
|
||||||
use std::io::BufRead;
|
|
||||||
use std::io::BufReader;
|
|
||||||
|
|
||||||
use clap::{Parser, ValueEnum};
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, ValueEnum)]
|
|
||||||
enum Algorithm {
|
|
||||||
Lcs,
|
|
||||||
Levenshtein,
|
|
||||||
LevenshteinNoSub,
|
|
||||||
DamerauLevenshtein,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Encrypt/decrypt files using catenaccio cipher
|
|
||||||
#[derive(Parser, Debug)]
|
|
||||||
#[command(version, about, long_about = None)]
|
|
||||||
struct CliArgs {
|
|
||||||
/// Word to look up
|
|
||||||
#[arg(short, long)]
|
|
||||||
wordkey: String,
|
|
||||||
|
|
||||||
/// Number of results returned
|
|
||||||
#[arg(short, long, default_value_t = 10)]
|
|
||||||
result_size: usize,
|
|
||||||
|
|
||||||
//Specify distance algorithm
|
|
||||||
#[arg(short, long, required = false, default_value = "damerau-levenshtein")]
|
|
||||||
distance: Algorithm,
|
|
||||||
|
|
||||||
//Toggle case sensitivity
|
|
||||||
#[arg(short, long, required = false, default_value_t = false)]
|
|
||||||
case_sensitive: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CliArgs {}
|
|
||||||
|
|
||||||
fn run<KC: KeyChecker<char>, DC: DistanceCalculator<char, KC>>(args: &CliArgs) {
|
|
||||||
let mut trie = LevTrie::<char, KC>::new();
|
|
||||||
let bytes = include_bytes!("words.txt");
|
|
||||||
let reader = BufReader::new(&bytes[..]);
|
|
||||||
reader
|
|
||||||
.lines()
|
|
||||||
.map(|line| line.unwrap())
|
|
||||||
.for_each(|word: String| {
|
|
||||||
trie.add(word.chars());
|
|
||||||
});
|
|
||||||
let word = args.wordkey.chars().collect::<Vec<char>>();
|
|
||||||
let results = trie.fuzzy_search::<DC>(&word, args.result_size);
|
|
||||||
for result in results {
|
|
||||||
let word: String = trie.lineal_descendant(result.word).into_iter().collect();
|
|
||||||
println!("distance: {}, wordkey: {}", result.distance, word);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
let args = CliArgs::parse();
|
|
||||||
if args.case_sensitive {
|
|
||||||
match args.distance {
|
|
||||||
Algorithm::Lcs => {
|
|
||||||
run::<CaseSensitiveKeyChecker, LcsDistanceCalculator>(&args);
|
|
||||||
}
|
|
||||||
Algorithm::Levenshtein => {
|
|
||||||
run::<CaseSensitiveKeyChecker, LevenshteinDistanceCalculator>(&args);
|
|
||||||
}
|
|
||||||
Algorithm::LevenshteinNoSub => {
|
|
||||||
run::<CaseSensitiveKeyChecker, LevenshteinNoSubDistanceCalculator>(&args);
|
|
||||||
}
|
|
||||||
Algorithm::DamerauLevenshtein => {
|
|
||||||
run::<CaseSensitiveKeyChecker, DamerauLevenshteinDistanceCalculator>(&args);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
match args.distance {
|
|
||||||
Algorithm::Lcs => {
|
|
||||||
run::<CaseInsensitiveKeyChecker, LcsDistanceCalculator>(&args);
|
|
||||||
}
|
|
||||||
Algorithm::Levenshtein => {
|
|
||||||
run::<CaseInsensitiveKeyChecker, LevenshteinDistanceCalculator>(&args);
|
|
||||||
}
|
|
||||||
Algorithm::LevenshteinNoSub => {
|
|
||||||
run::<CaseInsensitiveKeyChecker, LevenshteinNoSubDistanceCalculator>(&args);
|
|
||||||
}
|
|
||||||
Algorithm::DamerauLevenshtein => {
|
|
||||||
run::<CaseInsensitiveKeyChecker, DamerauLevenshteinDistanceCalculator>(&args);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
-466550
File diff suppressed because it is too large
Load Diff
@@ -1,26 +0,0 @@
|
|||||||
use super::trienode::TrieKey;
|
|
||||||
|
|
||||||
pub trait KeyChecker<KEY>
|
|
||||||
where
|
|
||||||
KEY: TrieKey,
|
|
||||||
{
|
|
||||||
fn check(k1: Option<KEY>, k2: Option<KEY>) -> bool;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct CaseInsensitiveKeyChecker {}
|
|
||||||
|
|
||||||
impl KeyChecker<char> for CaseInsensitiveKeyChecker {
|
|
||||||
fn check(k1: Option<char>, k2: Option<char>) -> bool {
|
|
||||||
k1.zip(k2)
|
|
||||||
.map(|(v1, v2)| v1.to_lowercase().next() == v2.to_lowercase().next())
|
|
||||||
.unwrap_or_else(|| k1 == k2)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct CaseSensitiveKeyChecker {}
|
|
||||||
|
|
||||||
impl KeyChecker<char> for CaseSensitiveKeyChecker {
|
|
||||||
fn check(k1: Option<char>, k2: Option<char>) -> bool {
|
|
||||||
k1 == k2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
-286
@@ -1,286 +0,0 @@
|
|||||||
extern crate sealed;
|
|
||||||
use self::sealed::sealed;
|
|
||||||
use std::collections::BinaryHeap;
|
|
||||||
|
|
||||||
use super::keychecker::KeyChecker;
|
|
||||||
use super::search_result::SearchResult;
|
|
||||||
use super::trie::Trie;
|
|
||||||
use super::trie::VisitOutcome;
|
|
||||||
use super::trienode::TrieKey;
|
|
||||||
use super::trienode::TrieNode;
|
|
||||||
|
|
||||||
pub type LevTrie<KEY, KEYCHECKER> = Trie<KEY, KEYCHECKER>;
|
|
||||||
pub type LevTrieNode<KEY> = TrieNode<KEY>;
|
|
||||||
|
|
||||||
#[sealed]
|
|
||||||
pub trait DistanceCalculator<KEY, KEYCHECKER>
|
|
||||||
where
|
|
||||||
KEY: TrieKey,
|
|
||||||
KEYCHECKER: KeyChecker<KEY>,
|
|
||||||
{
|
|
||||||
fn compute(
|
|
||||||
workspace: &mut Vec<Vec<usize>>,
|
|
||||||
nodes: &[LevTrieNode<KEY>],
|
|
||||||
stack: &[usize],
|
|
||||||
wordkey: &[KEY],
|
|
||||||
worst_case: Option<usize>,
|
|
||||||
) -> VisitOutcome;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<KEY, KEYCHECKER> LevTrie<KEY, KEYCHECKER>
|
|
||||||
where
|
|
||||||
KEY: TrieKey,
|
|
||||||
KEYCHECKER: KeyChecker<KEY>,
|
|
||||||
{
|
|
||||||
pub fn new() -> LevTrie<KEY, KEYCHECKER> {
|
|
||||||
Trie::default()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn from_words<T, U>(wordlist: U) -> LevTrie<KEY, KEYCHECKER>
|
|
||||||
where
|
|
||||||
T: IntoIterator<Item = KEY>,
|
|
||||||
U: IntoIterator<Item = T>,
|
|
||||||
{
|
|
||||||
let mut result = LevTrie::new();
|
|
||||||
for word in wordlist {
|
|
||||||
result.add(word);
|
|
||||||
}
|
|
||||||
result
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn fuzzy_search<DC>(&mut self, word: &[KEY], max_result: usize) -> Vec<SearchResult>
|
|
||||||
where
|
|
||||||
DC: DistanceCalculator<KEY, KEYCHECKER>,
|
|
||||||
{
|
|
||||||
let word_len = word.len();
|
|
||||||
let required_size = word_len + 1;
|
|
||||||
let workspace: &mut Vec<Vec<usize>> = &mut vec![Vec::new(); self.nodes()];
|
|
||||||
let mut result_heap = BinaryHeap::<SearchResult>::with_capacity(max_result + 1);
|
|
||||||
let visit_pre = |stack: &Vec<usize>| -> VisitOutcome {
|
|
||||||
let stack_size = stack.len();
|
|
||||||
let current_node_id = *stack.last().unwrap();
|
|
||||||
let payload = &mut workspace[current_node_id];
|
|
||||||
payload.resize(required_size, usize::default());
|
|
||||||
if stack_size == 1 {
|
|
||||||
for (i, item) in payload.iter_mut().enumerate() {
|
|
||||||
*item = i;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (i, item) in payload.iter_mut().enumerate() {
|
|
||||||
*item = if i == 0 { stack_size - 1 } else { 0 }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if stack_size > 1 {
|
|
||||||
let current_node = &mut self.get_node(current_node_id);
|
|
||||||
if current_node.key.is_none() {
|
|
||||||
let distance = workspace[stack[stack_size - 2]][word_len];
|
|
||||||
let search_result = SearchResult {
|
|
||||||
distance,
|
|
||||||
word: current_node_id,
|
|
||||||
};
|
|
||||||
result_heap.push(search_result);
|
|
||||||
if result_heap.len() > max_result {
|
|
||||||
result_heap.pop();
|
|
||||||
}
|
|
||||||
VisitOutcome::Skip
|
|
||||||
} else {
|
|
||||||
let worst_case = result_heap
|
|
||||||
.peek()
|
|
||||||
.filter(|_| result_heap.len() == max_result)
|
|
||||||
.map(|it| it.distance);
|
|
||||||
DC::compute(workspace, &self.nodes, stack, word, worst_case)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
VisitOutcome::Continue
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let visit_post = |_: &Vec<usize>| {};
|
|
||||||
self.walk(visit_pre, visit_post);
|
|
||||||
|
|
||||||
result_heap.into_sorted_vec()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct LevenshteinDistanceCalculator {}
|
|
||||||
|
|
||||||
#[sealed]
|
|
||||||
impl<KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for LevenshteinDistanceCalculator
|
|
||||||
where
|
|
||||||
KEY: TrieKey,
|
|
||||||
KEYCHECKER: KeyChecker<KEY>,
|
|
||||||
{
|
|
||||||
fn compute(
|
|
||||||
workspace: &mut Vec<Vec<usize>>,
|
|
||||||
nodes: &[LevTrieNode<KEY>],
|
|
||||||
stack: &[usize],
|
|
||||||
wordkey: &[KEY],
|
|
||||||
worst_case: Option<usize>,
|
|
||||||
) -> VisitOutcome {
|
|
||||||
let sz = stack.len();
|
|
||||||
let key_size = wordkey.len();
|
|
||||||
for i in 1..=key_size {
|
|
||||||
if KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 1]].key) {
|
|
||||||
workspace[stack[sz - 1]][i] = workspace[stack[sz - 2]][i - 1];
|
|
||||||
} else {
|
|
||||||
workspace[stack[sz - 1]][i] = std::cmp::min(
|
|
||||||
std::cmp::min(
|
|
||||||
workspace[stack[sz - 1]][i - 1],
|
|
||||||
workspace[stack[sz - 2]][i - 1],
|
|
||||||
),
|
|
||||||
workspace[stack[sz - 2]][i],
|
|
||||||
) + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let condition = worst_case
|
|
||||||
.map(|wv| wv <= *workspace[stack[sz - 1]][..].iter().min().unwrap())
|
|
||||||
.unwrap_or(false);
|
|
||||||
if condition {
|
|
||||||
VisitOutcome::Skip
|
|
||||||
} else {
|
|
||||||
VisitOutcome::Continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct DamerauLevenshteinDistanceCalculator {}
|
|
||||||
|
|
||||||
#[sealed]
|
|
||||||
impl<KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for DamerauLevenshteinDistanceCalculator
|
|
||||||
where
|
|
||||||
KEY: TrieKey,
|
|
||||||
KEYCHECKER: KeyChecker<KEY>,
|
|
||||||
{
|
|
||||||
fn compute(
|
|
||||||
workspace: &mut Vec<Vec<usize>>,
|
|
||||||
nodes: &[LevTrieNode<KEY>],
|
|
||||||
stack: &[usize],
|
|
||||||
wordkey: &[KEY],
|
|
||||||
worst_case: Option<usize>,
|
|
||||||
) -> VisitOutcome {
|
|
||||||
let sz = stack.len();
|
|
||||||
let key_size = wordkey.len();
|
|
||||||
for i in 1..=key_size {
|
|
||||||
if KEYCHECKER::check(
|
|
||||||
Some(wordkey[i - 1]),
|
|
||||||
stack.last().and_then(|it| nodes[*it].key),
|
|
||||||
) {
|
|
||||||
workspace[stack[sz - 1]][i] = workspace[stack[sz - 2]][i - 1];
|
|
||||||
} else {
|
|
||||||
workspace[stack[sz - 1]][i] = std::cmp::min(
|
|
||||||
std::cmp::min(
|
|
||||||
workspace[stack[sz - 1]][i - 1],
|
|
||||||
workspace[stack[sz - 2]][i - 1],
|
|
||||||
),
|
|
||||||
workspace[stack[sz - 2]][i],
|
|
||||||
) + 1;
|
|
||||||
}
|
|
||||||
if sz > 2
|
|
||||||
&& i > 1
|
|
||||||
&& KEYCHECKER::check(Some(wordkey[i - 2]), nodes[stack[sz - 1]].key)
|
|
||||||
&& KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 2]].key)
|
|
||||||
{
|
|
||||||
workspace[stack[sz - 1]][i] = std::cmp::min(
|
|
||||||
workspace[stack[sz - 1]][i],
|
|
||||||
workspace[stack[sz - 3]][i - 2] + 1,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let condition = worst_case
|
|
||||||
.map(|wv| wv <= *workspace[stack[sz - 2]][..].iter().min().unwrap())
|
|
||||||
.unwrap_or(false);
|
|
||||||
if condition {
|
|
||||||
VisitOutcome::Skip
|
|
||||||
} else {
|
|
||||||
VisitOutcome::Continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct LevenshteinNoSubDistanceCalculator {}
|
|
||||||
|
|
||||||
#[sealed]
|
|
||||||
impl<KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for LevenshteinNoSubDistanceCalculator
|
|
||||||
where
|
|
||||||
KEY: TrieKey,
|
|
||||||
KEYCHECKER: KeyChecker<KEY>,
|
|
||||||
{
|
|
||||||
fn compute(
|
|
||||||
workspace: &mut Vec<Vec<usize>>,
|
|
||||||
nodes: &[LevTrieNode<KEY>],
|
|
||||||
stack: &[usize],
|
|
||||||
wordkey: &[KEY],
|
|
||||||
worst_case: Option<usize>,
|
|
||||||
) -> VisitOutcome {
|
|
||||||
let sz = stack.len();
|
|
||||||
let key_size = wordkey.len();
|
|
||||||
for i in 1..=key_size {
|
|
||||||
if KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 1]].key) {
|
|
||||||
workspace[stack[sz - 1]][i] = workspace[stack[sz - 2]][i - 1];
|
|
||||||
} else {
|
|
||||||
workspace[stack[sz - 1]][i] = std::cmp::min(
|
|
||||||
std::cmp::min(
|
|
||||||
workspace[stack[sz - 1]][i - 1] + 1,
|
|
||||||
workspace[stack[sz - 2]][i - 1] + 2,
|
|
||||||
),
|
|
||||||
workspace[stack[sz - 2]][i] + 1,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let condition = worst_case
|
|
||||||
.map(|wv| wv <= *workspace[stack[sz - 1]][..].iter().min().unwrap())
|
|
||||||
.unwrap_or(false);
|
|
||||||
if condition {
|
|
||||||
VisitOutcome::Skip
|
|
||||||
} else {
|
|
||||||
VisitOutcome::Continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct LcsDistanceCalculator {}
|
|
||||||
|
|
||||||
#[sealed]
|
|
||||||
impl<KEY, KEYCHECKER> DistanceCalculator<KEY, KEYCHECKER> for LcsDistanceCalculator
|
|
||||||
where
|
|
||||||
KEY: TrieKey,
|
|
||||||
KEYCHECKER: KeyChecker<KEY>,
|
|
||||||
{
|
|
||||||
fn compute(
|
|
||||||
workspace: &mut Vec<Vec<usize>>,
|
|
||||||
nodes: &[LevTrieNode<KEY>],
|
|
||||||
stack: &[usize],
|
|
||||||
wordkey: &[KEY],
|
|
||||||
_: Option<usize>,
|
|
||||||
) -> VisitOutcome {
|
|
||||||
let max_distance = wordkey.len();
|
|
||||||
let sz = stack.len();
|
|
||||||
let key_size = wordkey.len();
|
|
||||||
for i in 1..=key_size {
|
|
||||||
if KEYCHECKER::check(Some(wordkey[i - 1]), nodes[stack[sz - 1]].key) {
|
|
||||||
workspace[stack[sz - 1]][i] = if sz == 2 || i == 1 {
|
|
||||||
max_distance - 1
|
|
||||||
} else {
|
|
||||||
workspace[stack[sz - 2]][i - 1] - 1
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
let up = if sz == 2 {
|
|
||||||
max_distance
|
|
||||||
} else {
|
|
||||||
workspace[stack[sz - 2]][i]
|
|
||||||
};
|
|
||||||
let left = if i == 1 {
|
|
||||||
max_distance
|
|
||||||
} else {
|
|
||||||
workspace[stack[sz - 1]][i - 1]
|
|
||||||
};
|
|
||||||
workspace[stack[sz - 1]][i] = std::cmp::min(up, left);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let condition = workspace[stack[sz - 1]][key_size] == 0;
|
|
||||||
if condition {
|
|
||||||
VisitOutcome::Skip
|
|
||||||
} else {
|
|
||||||
VisitOutcome::Continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
-30
@@ -1,30 +0,0 @@
|
|||||||
#[macro_use]
|
|
||||||
extern crate trait_group;
|
|
||||||
mod levtrie;
|
|
||||||
// pub use self::levtrie::LevTrieNode as LevTrieNode;
|
|
||||||
pub use self::levtrie::DamerauLevenshteinDistanceCalculator;
|
|
||||||
pub use self::levtrie::DistanceCalculator;
|
|
||||||
pub use self::levtrie::LcsDistanceCalculator;
|
|
||||||
pub use self::levtrie::LevTrie;
|
|
||||||
pub use self::levtrie::LevenshteinDistanceCalculator;
|
|
||||||
pub use self::levtrie::LevenshteinNoSubDistanceCalculator;
|
|
||||||
|
|
||||||
mod trie;
|
|
||||||
pub use self::trie::Trie;
|
|
||||||
|
|
||||||
mod trienode;
|
|
||||||
//use self::trienode::TrieNode as TrieNode;
|
|
||||||
|
|
||||||
mod keychecker;
|
|
||||||
pub use self::keychecker::CaseInsensitiveKeyChecker;
|
|
||||||
pub use self::keychecker::CaseSensitiveKeyChecker;
|
|
||||||
pub use self::keychecker::KeyChecker;
|
|
||||||
|
|
||||||
pub type CaseSensitiveLevTrie = LevTrie<char, CaseSensitiveKeyChecker>;
|
|
||||||
pub type CaseInSensitiveLevTrie = LevTrie<char, CaseInsensitiveKeyChecker>;
|
|
||||||
|
|
||||||
mod search_result;
|
|
||||||
pub use self::search_result::SearchResult;
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests;
|
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
use std::cmp::Ordering;
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct SearchResult {
|
|
||||||
pub word: usize,
|
|
||||||
pub distance: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PartialOrd for SearchResult {
|
|
||||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
|
||||||
Some(self.cmp(other))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PartialEq for SearchResult {
|
|
||||||
fn eq(&self, other: &Self) -> bool {
|
|
||||||
self.distance == other.distance && self.word == other.word
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Eq for SearchResult {}
|
|
||||||
|
|
||||||
impl Ord for SearchResult {
|
|
||||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
|
||||||
match self.distance.cmp(&other.distance) {
|
|
||||||
std::cmp::Ordering::Equal => self.word.cmp(&other.word),
|
|
||||||
std::cmp::Ordering::Greater => std::cmp::Ordering::Greater,
|
|
||||||
std::cmp::Ordering::Less => std::cmp::Ordering::Less,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
-138
@@ -1,138 +0,0 @@
|
|||||||
use super::{
|
|
||||||
CaseSensitiveLevTrie, DamerauLevenshteinDistanceCalculator, KeyChecker, LevTrie,
|
|
||||||
LevenshteinDistanceCalculator, SearchResult,
|
|
||||||
};
|
|
||||||
use std::collections::BTreeMap;
|
|
||||||
use std::fmt::Display;
|
|
||||||
use std::io::Write;
|
|
||||||
|
|
||||||
struct ExpectedResults {
|
|
||||||
data: Vec<(usize, usize)>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ExpectedResults {
|
|
||||||
fn new(id_map: &BTreeMap<String, usize>, results: &[(String, usize)]) -> ExpectedResults {
|
|
||||||
let data = results
|
|
||||||
.iter()
|
|
||||||
.map(|(key, distance)| {
|
|
||||||
(
|
|
||||||
*id_map
|
|
||||||
.get(key)
|
|
||||||
.ok_or_else(|| format!("Id not found for key '{key}'"))
|
|
||||||
.unwrap(),
|
|
||||||
*distance,
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.collect::<Vec<(usize, usize)>>();
|
|
||||||
ExpectedResults { data }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check(&self, search_results: &[SearchResult]) {
|
|
||||||
for i in 0..self.data.len() {
|
|
||||||
let SearchResult { word, distance } = search_results[i];
|
|
||||||
let data = self.data[i];
|
|
||||||
if data != (word, distance) {
|
|
||||||
panic!("({}, {}) <> ({}, {})", data.0, data.1, word, distance);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn print_search_results<T: Display + Copy, C: KeyChecker<T>>(
|
|
||||||
trie: &LevTrie<T, C>,
|
|
||||||
search_results: &[SearchResult],
|
|
||||||
key_separator: &str,
|
|
||||||
) -> Result<(), std::io::Error> {
|
|
||||||
for result in search_results {
|
|
||||||
let mut word = Vec::<u8>::new();
|
|
||||||
for (i, fragment) in trie.lineal_descendant(result.word).enumerate() {
|
|
||||||
if i > 0 {
|
|
||||||
word.write(format!("{}{}", key_separator, fragment).as_bytes())?;
|
|
||||||
} else {
|
|
||||||
word.write(format!("{}", fragment).as_bytes())?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
println!(
|
|
||||||
"distance: {}, wordkey: {}, id: {}",
|
|
||||||
result.distance,
|
|
||||||
String::from_utf8(word).unwrap(),
|
|
||||||
result.word
|
|
||||||
);
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
const WORDLIST: [&str; 16] = [
|
|
||||||
"skyscraper",
|
|
||||||
"camel",
|
|
||||||
"coal",
|
|
||||||
"caos",
|
|
||||||
"copper",
|
|
||||||
"hello",
|
|
||||||
"Bugis",
|
|
||||||
"Kembangan",
|
|
||||||
"Singapore",
|
|
||||||
"Fullerton",
|
|
||||||
"Lavender",
|
|
||||||
"aircraft",
|
|
||||||
"boat",
|
|
||||||
"ship",
|
|
||||||
"cargo",
|
|
||||||
"tanker",
|
|
||||||
];
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_damerau_levenshtein_strings() {
|
|
||||||
let mut trie: CaseSensitiveLevTrie = LevTrie::new();
|
|
||||||
let mut id_map = BTreeMap::<String, usize>::new();
|
|
||||||
for word in WORDLIST {
|
|
||||||
let (_, id) = trie.add(word.chars());
|
|
||||||
id_map.insert(String::from(word), id);
|
|
||||||
}
|
|
||||||
let results = trie.fuzzy_search::<DamerauLevenshteinDistanceCalculator>(
|
|
||||||
&"coat".chars().collect::<Vec<char>>(),
|
|
||||||
6,
|
|
||||||
);
|
|
||||||
|
|
||||||
print_search_results(&trie, &results, "").unwrap();
|
|
||||||
|
|
||||||
let expected_results = ExpectedResults::new(
|
|
||||||
&id_map,
|
|
||||||
&[
|
|
||||||
(String::from("coal"), 1),
|
|
||||||
(String::from("boat"), 1),
|
|
||||||
(String::from("caos"), 2),
|
|
||||||
(String::from("camel"), 4),
|
|
||||||
(String::from("copper"), 4),
|
|
||||||
(String::from("ship"), 4),
|
|
||||||
],
|
|
||||||
);
|
|
||||||
expected_results.check(&results);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_levenshtein_strings() {
|
|
||||||
let mut trie: CaseSensitiveLevTrie = LevTrie::new();
|
|
||||||
let mut id_map = BTreeMap::<String, usize>::new();
|
|
||||||
for word in WORDLIST {
|
|
||||||
let (_, id) = trie.add(word.chars());
|
|
||||||
id_map.insert(String::from(word), id);
|
|
||||||
}
|
|
||||||
let results = trie
|
|
||||||
.fuzzy_search::<LevenshteinDistanceCalculator>(&"coat".chars().collect::<Vec<char>>(), 6);
|
|
||||||
|
|
||||||
print_search_results(&trie, &results, "").unwrap();
|
|
||||||
|
|
||||||
let expected_results = ExpectedResults::new(
|
|
||||||
&id_map,
|
|
||||||
&[
|
|
||||||
(String::from("coal"), 1),
|
|
||||||
(String::from("boat"), 1),
|
|
||||||
(String::from("caos"), 3),
|
|
||||||
(String::from("camel"), 4),
|
|
||||||
(String::from("copper"), 4),
|
|
||||||
(String::from("ship"), 4),
|
|
||||||
],
|
|
||||||
);
|
|
||||||
expected_results.check(&results);
|
|
||||||
}
|
|
||||||
-225
@@ -1,225 +0,0 @@
|
|||||||
use super::keychecker::KeyChecker;
|
|
||||||
use super::trienode::TrieKey;
|
|
||||||
use super::trienode::TrieNode;
|
|
||||||
use std::collections::BTreeSet;
|
|
||||||
use std::iter::Iterator;
|
|
||||||
use std::marker::PhantomData;
|
|
||||||
|
|
||||||
pub enum VisitOutcome {
|
|
||||||
Continue,
|
|
||||||
Skip,
|
|
||||||
EarlyExit,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct Trie<KEY, KEYCHECKER>
|
|
||||||
where
|
|
||||||
KEY: TrieKey,
|
|
||||||
KEYCHECKER: KeyChecker<KEY>,
|
|
||||||
{
|
|
||||||
pub(crate) nodes: Vec<TrieNode<KEY>>,
|
|
||||||
tails: BTreeSet<usize>,
|
|
||||||
checker: PhantomData<KEYCHECKER>,
|
|
||||||
}
|
|
||||||
impl<KEY, KEYCHECKER> Default for Trie<KEY, KEYCHECKER>
|
|
||||||
where
|
|
||||||
KEY: TrieKey,
|
|
||||||
KEYCHECKER: KeyChecker<KEY>,
|
|
||||||
{
|
|
||||||
fn default() -> Self {
|
|
||||||
Trie {
|
|
||||||
nodes: vec![TrieNode::new0(None)],
|
|
||||||
tails: BTreeSet::new(),
|
|
||||||
checker: PhantomData,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl<KEY, KEYCHECKER> Trie<KEY, KEYCHECKER>
|
|
||||||
where
|
|
||||||
KEY: TrieKey,
|
|
||||||
KEYCHECKER: KeyChecker<KEY>,
|
|
||||||
{
|
|
||||||
pub fn trie_from_words<T, U>(wordlist: U) -> Trie<KEY, KEYCHECKER>
|
|
||||||
where
|
|
||||||
T: IntoIterator<Item = KEY>,
|
|
||||||
U: IntoIterator<Item = T>,
|
|
||||||
{
|
|
||||||
let mut result = Trie::default();
|
|
||||||
for word in wordlist {
|
|
||||||
result.add(word);
|
|
||||||
}
|
|
||||||
result
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn get_node_mut(&mut self, index: usize) -> &mut TrieNode<KEY> {
|
|
||||||
&mut self.nodes[index]
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn get_node(&self, index: usize) -> &TrieNode<KEY> {
|
|
||||||
&self.nodes[index]
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn nodes(&self) -> usize {
|
|
||||||
self.nodes.len()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_node(&mut self, key: Option<KEY>, parent: usize, prev: Option<usize>) -> usize {
|
|
||||||
let mut result = TrieNode::new0(key);
|
|
||||||
let result_index = self.nodes();
|
|
||||||
result.parent = Some(parent);
|
|
||||||
match prev {
|
|
||||||
Some(prev_node) => {
|
|
||||||
self.get_node_mut(prev_node).next = Some(result_index);
|
|
||||||
result.prev = prev;
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
let parent_node = self.get_node_mut(parent);
|
|
||||||
match parent_node.child {
|
|
||||||
None => {
|
|
||||||
parent_node.child = Some(result_index);
|
|
||||||
}
|
|
||||||
Some(parent_child) => {
|
|
||||||
let mut node = parent_child;
|
|
||||||
loop {
|
|
||||||
let next = self.get_node(node).next;
|
|
||||||
match next {
|
|
||||||
Some(next_node) => {
|
|
||||||
node = next_node;
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
self.get_node_mut(node).next = Some(result_index);
|
|
||||||
result.prev = Some(node)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
self.nodes.push(result);
|
|
||||||
result_index
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn add<T>(&mut self, path: T) -> (bool, usize)
|
|
||||||
where
|
|
||||||
T: IntoIterator<Item = KEY>,
|
|
||||||
{
|
|
||||||
let mut result = false;
|
|
||||||
let mut pnode = 0;
|
|
||||||
'wordLoop: for key in path {
|
|
||||||
let mut cnode = self.get_node(pnode).child;
|
|
||||||
while let Some(cnode_index) = cnode {
|
|
||||||
let cnode_node = self.get_node(cnode_index);
|
|
||||||
if KEYCHECKER::check(cnode_node.key, Some(key)) {
|
|
||||||
pnode = cnode_index;
|
|
||||||
continue 'wordLoop;
|
|
||||||
} else if self.get_node(cnode_index).next.is_none() {
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
cnode = self.get_node(cnode_index).next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pnode = self.add_node(Some(key), pnode, cnode);
|
|
||||||
result = true;
|
|
||||||
}
|
|
||||||
if result {
|
|
||||||
let tail = self.add_node(None, pnode, None);
|
|
||||||
self.tails.insert(tail);
|
|
||||||
let mut node = Some(tail);
|
|
||||||
while let Some(n) = node {
|
|
||||||
let current_node = self.get_node_mut(n);
|
|
||||||
current_node.ref_count += 1;
|
|
||||||
node = current_node.parent;
|
|
||||||
}
|
|
||||||
(true, tail)
|
|
||||||
} else {
|
|
||||||
(false, pnode)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn search(&mut self, path: Vec<KEY>) -> Option<usize> {
|
|
||||||
let mut result: Option<usize> = None;
|
|
||||||
let visit_pre = |stack: &Vec<usize>| -> VisitOutcome {
|
|
||||||
if stack.len() == 1 {
|
|
||||||
VisitOutcome::Continue
|
|
||||||
} else {
|
|
||||||
let last = *stack.last().expect("");
|
|
||||||
let index = stack.len() - 2;
|
|
||||||
let node = self.get_node(last);
|
|
||||||
if index < path.len() {
|
|
||||||
if KEYCHECKER::check(node.key, Some(path[index])) {
|
|
||||||
VisitOutcome::Continue
|
|
||||||
} else {
|
|
||||||
VisitOutcome::Skip
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if node.key.is_none() {
|
|
||||||
result = Some(last);
|
|
||||||
}
|
|
||||||
VisitOutcome::EarlyExit
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let visit_post = |_: &Vec<usize>| {};
|
|
||||||
self.walk(visit_pre, visit_post);
|
|
||||||
result
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn lineal_descendant(&self, start: usize) -> impl Iterator<Item = &KEY> {
|
|
||||||
let mut nodes: Vec<usize> = vec![];
|
|
||||||
let mut node_option = Some(start);
|
|
||||||
while let Some(node) = node_option {
|
|
||||||
let key = &self.get_node(node).key;
|
|
||||||
if key.is_some() {
|
|
||||||
nodes.push(node);
|
|
||||||
}
|
|
||||||
node_option = self.get_node(node).parent;
|
|
||||||
}
|
|
||||||
nodes
|
|
||||||
.into_iter()
|
|
||||||
.rev()
|
|
||||||
.map(|node_index| self.get_node(node_index).key.as_ref().unwrap())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn walk<CB1, CB2>(&self, mut visit_pre: CB1, mut visit_post: CB2)
|
|
||||||
where
|
|
||||||
CB1: FnMut(&Vec<usize>) -> VisitOutcome,
|
|
||||||
CB2: FnMut(&Vec<usize>),
|
|
||||||
{
|
|
||||||
let mut stack: Vec<(usize, Option<usize>)> = vec![];
|
|
||||||
let mut public_stack: Vec<usize> = vec![];
|
|
||||||
let root_node = self.get_node(0);
|
|
||||||
stack.push((0, root_node.child));
|
|
||||||
public_stack.push(0);
|
|
||||||
visit_pre(&public_stack);
|
|
||||||
while !stack.is_empty() {
|
|
||||||
let last = &mut stack.last_mut().unwrap();
|
|
||||||
match last.1 {
|
|
||||||
Some(child_node_id) => {
|
|
||||||
let child_node = self.get_node(child_node_id);
|
|
||||||
last.1 = child_node.next;
|
|
||||||
public_stack.push(child_node_id);
|
|
||||||
let visit_pre_outcome = visit_pre(&public_stack);
|
|
||||||
match visit_pre_outcome {
|
|
||||||
VisitOutcome::Continue => {
|
|
||||||
stack.push((child_node_id, child_node.child));
|
|
||||||
}
|
|
||||||
VisitOutcome::Skip => {
|
|
||||||
stack.push((child_node_id, None));
|
|
||||||
}
|
|
||||||
VisitOutcome::EarlyExit => return,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
visit_post(&public_stack);
|
|
||||||
stack.pop();
|
|
||||||
public_stack.pop();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn tails(&self) -> &BTreeSet<usize> {
|
|
||||||
&self.tails
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,41 +0,0 @@
|
|||||||
trait_group! {
|
|
||||||
pub trait TrieKey : std::marker::Copy + std::fmt::Display + Sized
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct TrieNode<KEY>
|
|
||||||
where
|
|
||||||
KEY: TrieKey,
|
|
||||||
{
|
|
||||||
pub key: Option<KEY>,
|
|
||||||
pub prev: Option<usize>,
|
|
||||||
pub next: Option<usize>,
|
|
||||||
pub child: Option<usize>,
|
|
||||||
pub parent: Option<usize>,
|
|
||||||
pub(crate) ref_count: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<KEY> TrieNode<KEY>
|
|
||||||
where
|
|
||||||
KEY: TrieKey,
|
|
||||||
{
|
|
||||||
fn new(
|
|
||||||
key: Option<KEY>,
|
|
||||||
prev: Option<usize>,
|
|
||||||
next: Option<usize>,
|
|
||||||
parent: Option<usize>,
|
|
||||||
child: Option<usize>,
|
|
||||||
) -> TrieNode<KEY> {
|
|
||||||
TrieNode {
|
|
||||||
key,
|
|
||||||
prev,
|
|
||||||
next,
|
|
||||||
child,
|
|
||||||
parent,
|
|
||||||
ref_count: 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn new0(key: Option<KEY>) -> TrieNode<KEY> {
|
|
||||||
TrieNode::new(key, None, None, None, None)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user