Compare commits

...

10 Commits

22 changed files with 324 additions and 110 deletions

4
Jenkinsfile vendored
View File

@@ -7,8 +7,8 @@ pipeline {
stage("Build") {
steps {
sh "./gradlew clean build"
junit testResults: "build/test-results/test/*.xml"
archiveArtifacts artifacts: 'build/libs/*.jar,benchmark/build/libs/*.jar',
junit testResults: "build/test-results/*Test/*.xml"
archiveArtifacts artifacts: 'build/libs/*.jar,build/libs/*.klib,benchmark/build/distributions/*.jar',
allowEmptyArchive: true,
fingerprint: true,
onlyIfSuccessful: true

61
README.md Normal file
View File

@@ -0,0 +1,61 @@
# Kotlin multiplatform library for Levenshtein distance
THis library is used to find the closest matches of a word in a predefined set of word, according to
[Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) or [Damerau-Levenshtein distance](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance).
### Build
The library uses Gradle as the build system, so the only required dependency is a valid JDK 17 implementation.
Then it can be built using
```bash
./gradlew build
```
### Usage
The library is avaliable from "https://woggioni.net/mvn" Maven repository and can ba consumed by another Gradle
project simply by adding
```Groovy
repositories {
maven {
url = "https://woggioni.net/mvn"
}
}
dependencies {
implementation group: 'net.woggioni', name: 'klevtree', version: '2023.03'
}
```
As a Kotlin multiplatform library it currently supports the jvm, js and linuxX64 targets
### Example code
```kotlin
val words = listOf(
"tired",
"authorise",
"exercise",
"bloody",
"ritual",
"trail",
"resort",
"landowner",
"navy",
"captivate",
"captivity",
"north")
val tree = LevTrie().apply {
algorithm = LevTrie.Algorithm.DAMERAU_LEVENSHTEIN
caseSensitive = false
words.forEach(this::add)
}
val result = tree.fuzzySearch("fired", 1)
result.forEach {
println("Word: ${it.first}, distance: ${it.second}")
}
// Word: tired, distance: 1
```

View File

@@ -1,13 +1,27 @@
plugins {
alias catalog.plugins.kotlin.jvm
alias catalog.plugins.envelope
alias catalog.plugins.kotlin.multiplatform
alias catalog.plugins.envelope apply false
}
import net.woggioni.gradle.envelope.EnvelopeJarTask
kotlin {
jvm()
}
dependencies {
implementation catalog.jwo
implementation(rootProject)
jvmMainImplementation catalog.jwo
jvmMainImplementation(rootProject)
}
envelopeJar {
Provider<EnvelopeJarTask> envelopeJarTaskProvider = project.tasks.register("envelopeJar", EnvelopeJarTask.class) {
group = BasePlugin.BUILD_GROUP
description = "Package the application in a single executable jar file"
includeLibraries(project.configurations.named("jvmRuntimeClasspath"))
includeLibraries(project.getTasks().named("jvmJar", Jar.class))
mainClass = "net.woggioni.klevtree.benchmark.BenchmarkKt"
}
project.tasks.named(BasePlugin.ASSEMBLE_TASK_NAME, DefaultTask.class) {
dependsOn(envelopeJarTaskProvider)
}

View File

@@ -6,20 +6,18 @@ import java.io.InputStreamReader
import net.woggioni.jwo.Chronometer
fun main() {
val reader = BufferedReader(
val tree = LevTrie().apply {
caseSensitive = false
algorithm = LevTrie.Algorithm.DAMERAU_LEVENSHTEIN
}
BufferedReader(
InputStreamReader(Chronometer::class.java.getResourceAsStream("/cracklib-small"))
)
val tree = LevTrie()
tree.caseSensitive = false
try {
).use { reader ->
for(line in reader.lines()) {
tree.add(line.asIterable())
}
} finally {
reader.close()
}
tree.algorithm = LevTrie.Algorithm.DAMERAU_LEVENSHTEIN
tree.caseSensitive = false
val chr = Chronometer()
val keys = arrayOf("camel", "coriolis", "mattel", "cruzer", "cpoper", "roublesoot")

View File

@@ -1,9 +1,11 @@
plugins {
id 'maven-publish'
alias catalog.plugins.kotlin.jvm
alias catalog.plugins.multi.release.jar
alias catalog.plugins.kotlin.multiplatform
}
import org.jetbrains.kotlin.gradle.dsl.JvmTarget
import org.jetbrains.kotlin.gradle.dsl.KotlinJvmCompilerOptions
import org.apache.tools.ant.taskdefs.condition.Os
allprojects {
group = "net.woggioni"
@@ -17,34 +19,57 @@ allprojects {
}
}
ext {
setProperty('jpms.module.name', 'net.woggioni.klevtree')
}
kotlin {
sourceSets {
commonTest {
dependencies {
implementation group: 'org.jetbrains.kotlin',
name: 'kotlin-test',
version: catalog.versions.kotlin.get()
}
}
}
jvm {
jvmToolchain(17)
withJava()
compilations.main {
Action<KotlinJvmCompilerOptions> action = { KotlinJvmCompilerOptions kjco ->
kjco.javaParameters = true
kjco.jvmTarget = JvmTarget.JVM_1_8
kjco.moduleName = "net.woggioni.klevtree"
}
compilerOptions.configure(action)
}
}
dependencies {
implementation catalog.jwo
js(IR) {
nodejs()
}
testImplementation catalog.junit.jupiter.api
testImplementation catalog.junit.jupiter.params
testRuntimeOnly catalog.junit.jupiter.engine
testRuntimeOnly catalog.log4j.slf4j.impl
}
compileJava {
options.release = 8
options.compilerArgs << '-parameters'
}
compileKotlin {
kotlinOptions.with {
jvmTarget = '1.8'
if(Os.isFamily(Os.FAMILY_UNIX)) {
if(Os.isArch("amd64")) {
linuxX64()
} else if(Os.isArch("aarch64")) {
linuxArm64()
}
} else if(Os.isFamily(Os.FAMILY_MAC)) {
if(Os.isArch("amd64")) {
macosX64()
} else if(Os.isArch("aarch64")) {
macosArm64()
}
} else if(Os.isFamily(Os.FAMILY_WINDOWS)) {
mingwX64()
}
}
test {
useJUnitPlatform()
compileJava {
options.release = 11
String patchString = 'net.woggioni.klevtree=' + kotlin.targets.jvm.compilations.main.output.classesDirs.asPath
options.compilerArgs << '--patch-module' << patchString
}
publishing {
repositories {
maven {
@@ -53,7 +78,7 @@ publishing {
}
publications {
maven(MavenPublication) {
from(components["java"])
from(components.kotlin)
}
}
}

View File

@@ -1,6 +1,7 @@
kotlin.code.style=official
kotlin.jvm.target.validation.mode=ignore
woggioniMavenRepositoryUrl=https://woggioni.net/mvn/
klevtree.version = 1.0-SNAPSHOT
lys.version = 0.2-SNAPSHOT
klevtree.version = 2024.02.12
lys.version = 2024.02.09

Binary file not shown.

View File

@@ -1,5 +1,7 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.0-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

35
gradlew vendored
View File

@@ -55,7 +55,7 @@
# Darwin, MinGW, and NonStop.
#
# (3) This script is generated from the Groovy template
# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# within the Gradle project.
#
# You can find Gradle at https://github.com/gradle/gradle/.
@@ -80,13 +80,11 @@ do
esac
done
APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
APP_NAME="Gradle"
# This is normally unused
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum
@@ -133,22 +131,29 @@ location of your Java installation."
fi
else
JAVACMD=java
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
if ! command -v java >/dev/null 2>&1
then
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
fi
# Increase the maximum file descriptors if we can.
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
case $MAX_FD in #(
max*)
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC2039,SC3045
MAX_FD=$( ulimit -H -n ) ||
warn "Could not query maximum file descriptor limit"
esac
case $MAX_FD in #(
'' | soft) :;; #(
*)
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC2039,SC3045
ulimit -n "$MAX_FD" ||
warn "Could not set maximum file descriptor limit to $MAX_FD"
esac
@@ -193,11 +198,15 @@ if "$cygwin" || "$msys" ; then
done
fi
# Collect all arguments for the java command;
# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
# shell script including quotes and variable substitutions, so put them in
# double quotes to make sure that they get re-expanded; and
# * put everything else in single quotes, so that it's not re-expanded.
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Collect all arguments for the java command:
# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
# and any embedded shellness will be escaped.
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
# treated as '${Hostname}' itself on the command line.
set -- \
"-Dorg.gradle.appname=$APP_BASE_NAME" \

1
gradlew.bat vendored
View File

@@ -26,6 +26,7 @@ if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%"=="" set DIRNAME=.
@rem This is normally unused
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

View File

@@ -1,12 +1,14 @@
package net.woggioni.klevtree
import net.woggioni.jwo.TreeNodeVisitor
import net.woggioni.klevtree.tree.TreeNodeVisitor
import kotlin.math.min
sealed class DistanceCalculator {
abstract fun compute(keyChecker : Trie.Keychecker<Char>,
stack: List<TreeNodeVisitor.StackContext<LevNode, Unit>>,
wordkey: String,
worstCase : Int) : TreeNodeVisitor.VisitOutcome
stack: List<TreeNodeVisitor.StackContext<LevNode, Unit>>,
wordkey: String,
worstCase : Int) : TreeNodeVisitor.VisitOutcome
object LevenshteinDistanceCalculator : DistanceCalculator() {
override fun compute(keyChecker : Trie.Keychecker<Char>,
@@ -21,7 +23,7 @@ sealed class DistanceCalculator {
if(keyChecker.check(wordkey[i - 1], currentStackElement.node.key)) {
currentRow[i] = previousRow[i - 1]
} else {
currentRow[i] = Math.min(Math.min(currentRow[i - 1], previousRow[i -1]), previousRow[i]) + 1
currentRow[i] = min(min(currentRow[i - 1], previousRow[i -1]), previousRow[i]) + 1
}
}
return if(worstCase >= 0 && worstCase <= currentRow.minOrNull()!!) {
@@ -45,13 +47,13 @@ sealed class DistanceCalculator {
if (keyChecker.check(wordkey[i - 1], cse.node.key)) {
crow[i] = prow[i - 1]
} else {
crow[i] = Math.min(Math.min(crow[i - 1], prow[i - 1]), prow[i]) + 1
crow[i] = min(min(crow[i - 1], prow[i - 1]), prow[i]) + 1
}
if (stack.size > 2 && i > 1 && keyChecker.check(wordkey[i - 2], cse.node.key)
&& keyChecker.check(wordkey[i - 1], pse.node.key)) {
val ppse = stack[stack.size - 3]
val pprow: IntArray = ppse.node.payload!!
crow[i] = Math.min(crow[i], pprow[i - 2] + 1)
crow[i] = min(crow[i], pprow[i - 2] + 1)
}
}
return if(worstCase >= 0 && worstCase <= prow.minOrNull()!!) {

View File

@@ -1,9 +1,9 @@
package net.woggioni.klevtree
import net.woggioni.jwo.TreeNodeVisitor
import net.woggioni.jwo.TreeWalker
import net.woggioni.klevtree.node.CharNode
import net.woggioni.klevtree.node.TrieNode
import net.woggioni.klevtree.tree.TreeNodeVisitor
import net.woggioni.klevtree.tree.TreeWalker
internal typealias LevNode = TrieNode<Char, IntArray>
@@ -41,7 +41,8 @@ class LevTrie : CharTrie<IntArray>() {
}
fun fuzzySearch(word : String, maxResult: Int) : List<Pair<String, Int>> {
val result = sortedSetOf<Pair<String, Int>>(compareBy({ it.second }, { it.first }))
val comparator : Comparator<Pair<String, Int>> = compareBy({ it.second }, { it.first })
val result = mutableListOf<Pair<String, Int>>()
val requiredSize = word.length + 1
val visitor = object: TreeNodeVisitor<LevNode, Unit> {
override fun visitPre(stack: List<TreeNodeVisitor.StackContext<LevNode, Unit>>): TreeNodeVisitor.VisitOutcome {
@@ -64,6 +65,7 @@ class LevTrie : CharTrie<IntArray>() {
val candidate = sb.toString()
val distance = stack[stack.size - 2].node.payload!![word.length]
result.add(candidate to distance)
result.sortWith(comparator)
if(result.size > maxResult) {
result.remove(result.last())
}

View File

@@ -1,8 +1,8 @@
package net.woggioni.klevtree
import net.woggioni.jwo.TreeNodeVisitor
import net.woggioni.jwo.TreeWalker
import net.woggioni.klevtree.node.TrieNode
import net.woggioni.klevtree.tree.TreeNodeVisitor
import net.woggioni.klevtree.tree.TreeWalker
abstract class Trie<T : TrieNode<KEY, PAYLOAD>, KEY, PAYLOAD> {

View File

@@ -1,6 +1,6 @@
package net.woggioni.klevtree.node
import net.woggioni.jwo.TreeNodeVisitor
import net.woggioni.klevtree.tree.TreeNodeVisitor
open class TrieNode<T, PAYLOAD>(val key : T?) : TreeNodeVisitor.TreeNode<TrieNode<T, PAYLOAD>> {

View File

@@ -0,0 +1,64 @@
package net.woggioni.klevtree.tree
/**
* This interface must be implemented by the user of [TreeWalker] and its methods will be called by
* [TreeWalker.walk]. The methods will receive as an input a list of [StackContext]
* instances each one correspond to a node in the tree, each node is preceded in the list
* by its parents in the tree. Each instance has a method, [StackContext.context]
* to set a custom object that can be used in the [.visitPre] method and the method
* [StackContext.context] that can be used in the [.visitPost] method to retrieve
* the same instance. This is to provide support for algorithms that require both pre-order and post-order logic.
* The last element of the list corresponds to the node currently being traversed.
* @param <T> the type of the context object used
</T> */
interface TreeNodeVisitor<NODE : TreeNodeVisitor.TreeNode<NODE>, T> {
interface TreeNode<NODE : TreeNode<NODE>> {
fun children(): Iterator<NODE>?
}
/**
* This interface exposes the methods that are visible to the user of
* [TreeWalker], it allows to
* set/get a custom object in the current stack context or to get the current link's Aci
* @param <T> the type of the context object used
</T> */
interface StackContext<NODE : TreeNode<*>?, T> {
/**
* @return the current user object
*/
/**
* @param ctx the user object to set for this stack level
*/
var context: T?
/**
* @return the current TreeNode
*/
val node: NODE
}
enum class VisitOutcome {
CONTINUE,
SKIP,
EARLY_EXIT
}
/**
* This method will be called for each link using
* [a Depth-first pre-oder algorithm](https://en.wikipedia.org/wiki/Tree_traversal#Pre-order_(NLR))
* @param stack is a list of [StackContext] instances corresponding to the full path from the root to the
* current node in the tree
* @return a boolean that will be used to decide whether to traverse the subtree rooted in the current link or not
*/
fun visitPre(stack: List<StackContext<NODE, T>>): VisitOutcome {
return VisitOutcome.CONTINUE
}
/**
* This method will be called for each node using
* [a Depth-first post-oder algorithm](https://en.wikipedia.org/wiki/Tree_traversal#Post-order_(LRN))
* @param stack is a list of [StackContext] instances corresponding to the full path from the root to the
* current node in the tree
*/
fun visitPost(stack: List<StackContext<NODE, T>>) {}
}

View File

@@ -0,0 +1,48 @@
package net.woggioni.klevtree.tree
class TreeWalker<NODE : TreeNodeVisitor.TreeNode<NODE>, T>(
private val visitor: TreeNodeVisitor<NODE, T>
) {
private class StackElement<NODE : TreeNodeVisitor.TreeNode<NODE>, T>(override val node: NODE) :
TreeNodeVisitor.StackContext<NODE, T> {
override var context: T? = null
var childrenIterator: Iterator<NODE>? = null
}
/**
* This methods does the actual job of traversing the tree calling the methods of the provided
* [TreeNodeVisitor] instance
* @param root the root node of the tree
*/
fun walk(root: NODE) {
val stack: MutableList<StackElement<NODE, T>> = mutableListOf()
val rootStackElement = StackElement<NODE, T>(root)
stack.add(rootStackElement)
val publicStack: List<TreeNodeVisitor.StackContext<NODE, T>> = stack
when (visitor.visitPre(publicStack)) {
TreeNodeVisitor.VisitOutcome.CONTINUE -> rootStackElement.childrenIterator = root.children()
TreeNodeVisitor.VisitOutcome.SKIP -> rootStackElement.childrenIterator = null
TreeNodeVisitor.VisitOutcome.EARLY_EXIT -> return
}
while (stack.isNotEmpty()) {
val lastElement: StackElement<NODE, T> = stack.last()
val childrenIterator = lastElement.childrenIterator
if (childrenIterator != null && childrenIterator.hasNext()) {
val childNode = childrenIterator.next()
val childStackElement = StackElement<NODE, T>(childNode)
stack.add(childStackElement)
when (visitor.visitPre(publicStack)) {
TreeNodeVisitor.VisitOutcome.CONTINUE -> childStackElement.childrenIterator = childNode.children()
TreeNodeVisitor.VisitOutcome.SKIP -> childStackElement.childrenIterator = null
TreeNodeVisitor.VisitOutcome.EARLY_EXIT -> return
}
} else {
visitor.visitPost(publicStack)
stack.removeLast()
}
}
}
}

View File

@@ -1,7 +1,11 @@
package net.woggioni.klevtree
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertNotNull
import kotlin.test.assertNull
import kotlin.test.assertTrue
class LevtreeTest {
@@ -13,23 +17,23 @@ class LevtreeTest {
val word = "dailies"
run {
val pair = tree.add(word)
Assertions.assertTrue(pair.first)
assertTrue(pair.first)
val node = tree.search(word)
Assertions.assertNotNull(node)
Assertions.assertEquals(
assertNotNull(node)
assertEquals(
word,
node!!.linealDescendant().fold(StringBuilder(), StringBuilder::append).toString()
node.linealDescendant().fold(StringBuilder(), StringBuilder::append).toString()
)
val result = tree.fuzzySearch(word, 5)
Assertions.assertEquals(1, result.size)
Assertions.assertEquals(word to 0, result[0])
assertEquals(1, result.size)
assertEquals(word to 0, result[0])
}
run {
tree.remove(word)
val node = tree.search(word)
Assertions.assertNull(node)
assertNull(node)
val result = tree.fuzzySearch(word, 5)
Assertions.assertEquals(0, result.size)
assertEquals(0, result.size)
}
}
@@ -63,21 +67,21 @@ class LevtreeTest {
run {
val word = "fired"
val result = tree.fuzzySearch(word, 4)
Assertions.assertEquals(4, result.size)
Assertions.assertEquals("tired" to 1, result[0])
assertEquals(4, result.size)
assertEquals("tired" to 1, result[0])
}
run {
val word = "tierd"
val result = tree.fuzzySearch(word, 4)
Assertions.assertEquals(4, result.size)
Assertions.assertEquals("tired" to 2, result[0])
assertEquals(4, result.size)
assertEquals("tired" to 2, result[0])
}
run {
val word = "tierd"
tree.remove("tired")
val result = tree.fuzzySearch(word, 4)
Assertions.assertEquals(4, result.size)
Assertions.assertEquals("trail" to 4, result[0])
assertEquals(4, result.size)
assertEquals("trail" to 4, result[0])
}
}
@@ -89,22 +93,22 @@ class LevtreeTest {
run {
val word = "fired"
val result = tree.fuzzySearch(word, 4)
Assertions.assertEquals(4, result.size)
Assertions.assertEquals("tired" to 1, result[0])
assertEquals(4, result.size)
assertEquals("tired" to 1, result[0])
}
run {
val word = "capitvate"
val result = tree.fuzzySearch(word, 4)
Assertions.assertEquals(4, result.size)
Assertions.assertEquals("captivate" to 1, result[0])
Assertions.assertEquals("captivity" to 3, result[1])
assertEquals(4, result.size)
assertEquals("captivate" to 1, result[0])
assertEquals("captivity" to 3, result[1])
}
run {
tree.remove("captivate")
val word = "capitvate"
val result = tree.fuzzySearch(word, 4)
Assertions.assertEquals(4, result.size)
Assertions.assertEquals("captivity" to 3, result[0])
assertEquals(4, result.size)
assertEquals("captivity" to 3, result[0])
}
}
}

View File

@@ -1,4 +1,4 @@
module net.woggioni.klevtree {
requires net.woggioni.jwo;
requires kotlin.stdlib;
exports net.woggioni.klevtree;
}

View File

@@ -1,17 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="WARN">
<Appenders>
<Console name="Console" target="SYSTEM_ERR">
<PatternLayout pattern="%d{HH:mm:ss,SSS} %highlight{[%p]} (%t) %c: %m%n"/>
<Filters>
<ThresholdFilter level="ERROR" onMatch="ACCEPT" />
</Filters>
</Console>
</Appenders>
<Loggers>
<Root level="ALL">
<AppenderRef ref="Console"/>
</Root>
</Loggers>
</Configuration>