initial commit

This commit is contained in:
2019-11-30 10:38:38 +00:00
commit d6727ddad1
14 changed files with 55379 additions and 0 deletions

View File

@@ -0,0 +1,28 @@
plugins {
kotlin("jvm")
application
}
group = "woggioni.net"
version = "0.1"
repositories {
mavenLocal()
mavenCentral()
jcenter()
}
dependencies {
implementation(kotlin("stdlib-jdk8"))
compile(rootProject)
// runtime(files(rootProject.projectDir.toPath().resolve("src/test/resources")))
}
application {
mainClassName = "net.woggioni.klevtree.benchmark.BenchmarkKt"
}
java {
sourceCompatibility = JavaVersion.VERSION_1_8
targetCompatibility = JavaVersion.VERSION_1_8
}

View File

@@ -0,0 +1 @@
rootProject.name = "klevtree-benchmark"

View File

@@ -0,0 +1,43 @@
package net.woggioni.klevtree.benchmark
import net.woggioni.klevtree.ILevTrie
import net.woggioni.klevtree.LevTrie
import java.io.BufferedReader
import java.io.InputStreamReader
import net.woggioni.jwo.Chronometer
fun main(args: Array<String>) {
val reader = BufferedReader(
InputStreamReader(Chronometer::class.java.getResourceAsStream("/cracklib-small"))
)
val tree = LevTrie()
tree.caseSensitive = false
try {
for(line in reader.lines()) {
tree.add(line.asIterable())
}
} finally {
reader.close()
}
tree.algorithm = ILevTrie.Algorithm.DAMERAU_LEVENSHTEIN
tree.caseSensitive = false
val chr = Chronometer()
val keys = arrayOf("camel", "coriolis", "mattel", "cruzer", "cpoper", "roublesoot")
for (ind in 0 until 50) {
for (searchKey in keys) {
tree.fuzzySearch(searchKey, 6)
}
}
for (searchKey in keys) {
val standing = tree.fuzzySearch(searchKey, 6)
for (res in standing) {
println("distance: ${res.second}\t wordkey: ${res.first}")
}
println()
}
System.out.printf("Elapsed time: %.3f s\n", chr.elapsed(Chronometer.UnitOfMeasure.SECONDS))
println("++++++++++++ End benchmark ++++++++++++")
}

File diff suppressed because it is too large Load Diff

29
build.gradle.kts Normal file
View File

@@ -0,0 +1,29 @@
plugins {
kotlin("jvm") version "1.3.41"
}
group = "woggioni.net"
version = "0.1"
repositories {
mavenLocal()
mavenCentral()
jcenter()
}
dependencies {
implementation(kotlin("stdlib-jdk8"))
compile("net.woggioni:jwo:1.0")
testImplementation ("junit:junit:4.12")
testImplementation("org.jetbrains.kotlin:kotlin-test-junit:1.3.41")
testImplementation("org.apache.logging.log4j:log4j-core:2.12.1")
testImplementation("org.apache.logging.log4j:log4j-slf4j-impl:2.12.1")
}
java {
sourceCompatibility = JavaVersion.VERSION_1_8
targetCompatibility = JavaVersion.VERSION_1_8
}

1
gradle.properties Normal file
View File

@@ -0,0 +1 @@
kotlin.code.style=official

2
settings.gradle.kts Normal file
View File

@@ -0,0 +1,2 @@
rootProject.name = "klevtree"
include("benchmark")

View File

@@ -0,0 +1,39 @@
package net.woggioni.klevtree
import net.woggioni.klevtree.node.CharNode
import net.woggioni.klevtree.node.TrieNode
interface ICharTrie<PAYLOAD> : Trie<CharNode<PAYLOAD>, Char, PAYLOAD> {
class CaseInsensitiveKeyChecker : Trie.Keychecker<Char> {
override fun check(key1: Char?, key2: Char?) = key1 == key2
}
class CaseSensitiveKeyChecker : Trie.Keychecker<Char> {
override fun check(key1: Char?, key2: Char?) = key1?.toLowerCase() == key2?.toLowerCase()
}
var caseSensitive : Boolean
fun add(word : String) = super.add(word.asIterable())
fun search(word : String) : TrieNode<Char, PAYLOAD>? = search(word.asIterable().toList())
fun remove(word : String) = remove(word.asIterable().toList())
}
class CharTrie<PAYLOAD> : ICharTrie<PAYLOAD> {
override val root: TrieNode<Char, PAYLOAD> = CharNode(null)
override val tails = mutableListOf<TrieNode<Char, PAYLOAD>>()
override var keyChecker: Trie.Keychecker<Char> = ICharTrie.CaseSensitiveKeyChecker()
override var caseSensitive : Boolean = true
set(value) {
if(value) {
keyChecker = ICharTrie.CaseSensitiveKeyChecker()
} else {
keyChecker = ICharTrie.CaseInsensitiveKeyChecker()
}
field = value
}
}

View File

@@ -0,0 +1,171 @@
package net.woggioni.klevtree
import net.woggioni.jwo.tree.StackContext
import net.woggioni.jwo.tree.TreeNodeVisitor
import net.woggioni.jwo.tree.TreeWalker
import net.woggioni.klevtree.node.CharNode
import net.woggioni.klevtree.node.TrieNode
typealias LevNode = TrieNode<Char, IntArray>
interface ILevTrie : ICharTrie<IntArray> {
interface DistanceCalculator {
fun compute(keyChecker : Trie.Keychecker<Char>,
stack: List<StackContext<LevNode, Unit>>,
wordkey: String,
worstCase : Int) : TreeNodeVisitor.VisitOutcome
}
object LevenshteinDistanceCalculator : DistanceCalculator {
override fun compute(keyChecker : Trie.Keychecker<Char>,
stack: List<StackContext<LevNode, Unit>>,
wordkey: String,
worstCase: Int) : TreeNodeVisitor.VisitOutcome {
val previousStackElement = stack[stack.size - 2]
val currentStackElement = stack.last()
val previousRow : IntArray = previousStackElement.node.payload!!
val currentRow : IntArray = currentStackElement.node.payload!!
for (i in 1..wordkey.length) {
if(keyChecker.check(wordkey[i - 1], currentStackElement.node.key)) {
currentRow[i] = previousRow[i - 1]
} else {
currentRow[i] = Math.min(Math.min(currentRow[i - 1], previousRow[i -1]), previousRow[i]) + 1
}
}
return if(worstCase >= 0 && worstCase <= currentRow.min()!!) {
TreeNodeVisitor.VisitOutcome.SKIP
} else {
TreeNodeVisitor.VisitOutcome.CONTINUE
}
}
}
object DamerauLevenshteinDistanceCalculator : DistanceCalculator {
override fun compute(keyChecker : Trie.Keychecker<Char>,
stack: List<StackContext<LevNode, Unit>>,
wordkey: String,
worstCase : Int) : TreeNodeVisitor.VisitOutcome {
val pse = stack[stack.size - 2]
val cse = stack.last()
val prow : IntArray = pse.node.payload!!
val crow : IntArray = cse.node.payload!!
for (i in 1..wordkey.length) {
if (keyChecker.check(wordkey[i - 1], cse.node.key)) {
crow[i] = prow[i - 1]
} else {
crow[i] = Math.min(Math.min(crow[i - 1], prow[i - 1]), prow[i]) + 1
}
if (stack.size > 2 && i > 1 && keyChecker.check(wordkey[i - 2], cse.node.key)
&& keyChecker.check(wordkey[i - 1], pse.node.key)) {
val ppse = stack[stack.size - 3]
val pprow: IntArray = ppse.node.payload!!
crow[i] = Math.min(crow[i], pprow[i - 2] + 1)
}
}
return if(worstCase >= 0 && worstCase <= prow.min()!!) {
TreeNodeVisitor.VisitOutcome.SKIP
} else {
TreeNodeVisitor.VisitOutcome.CONTINUE
}
}
}
enum class Algorithm {
/**
* Plain Levenshtein distance
*/
LEVENSHTEIN,
/**
* Damerau-Levenshtein distance
*/
DAMERAU_LEVENSHTEIN
}
var distanceCalculator : DistanceCalculator
var algorithm : Algorithm
get() {
return when(distanceCalculator) {
LevenshteinDistanceCalculator -> Algorithm.LEVENSHTEIN
DamerauLevenshteinDistanceCalculator -> Algorithm.DAMERAU_LEVENSHTEIN
else -> Algorithm.LEVENSHTEIN
}
}
set(value) {
when(value) {
Algorithm.LEVENSHTEIN -> distanceCalculator = LevenshteinDistanceCalculator
Algorithm.DAMERAU_LEVENSHTEIN -> distanceCalculator = DamerauLevenshteinDistanceCalculator
}
}
fun fuzzySearch(word : String, maxResult: Int) : List<Pair<String, Int>> {
val result = sortedSetOf<Pair<String, Int>>(compareBy({ it.second }, { it.first }))
val requiredSize = word.length + 1
fun visitNode(stack: List<StackContext<LevNode, Unit>>) : TreeNodeVisitor.VisitOutcome {
if(stack.size > 1) {
val currentStackElement = stack.last()
if(currentStackElement.node.key == null) {
val sb = StringBuilder()
for(c in currentStackElement.node.linealDescendant()) {
sb.append(c)
}
val candidate = sb.toString()
val distance = stack[stack.size - 2].node.payload!![word.length]
result.add(candidate to distance)
if(result.size > maxResult) {
result.remove(result.last())
}
return TreeNodeVisitor.VisitOutcome.SKIP
} else {
return distanceCalculator.compute(keyChecker, stack, word,
if(result.size == maxResult) result.last().second else -1)
}
} else {
return TreeNodeVisitor.VisitOutcome.CONTINUE
}
}
val visitor = if(root.payload == null || root.payload!!.size < requiredSize) {
object: TreeNodeVisitor<LevNode, Unit> {
override fun visitPre(stack: List<StackContext<LevNode, Unit>>): TreeNodeVisitor.VisitOutcome {
val currentNode = stack.last()
if(stack.size == 1) {
currentNode.node.payload = IntArray(requiredSize) { i -> i }
} else {
currentNode.node.payload = IntArray(requiredSize) { i -> if(i == 0) stack.size - 1 else 0 }
}
visitNode(stack)
return TreeNodeVisitor.VisitOutcome.CONTINUE
}
}
} else object: TreeNodeVisitor<LevNode, Unit> {
override fun visitPre(stack: List<StackContext<LevNode, Unit>>): TreeNodeVisitor.VisitOutcome {
return visitNode(stack)
}
}
val walker = TreeWalker<LevNode, Unit>(visitor)
walker.walk(root)
return result.toList()
}
}
class LevTrie : ILevTrie {
override val root: TrieNode<Char, IntArray> = CharNode(null)
override val tails = mutableListOf<TrieNode<Char, IntArray>>()
override var keyChecker: Trie.Keychecker<Char> = ICharTrie.CaseSensitiveKeyChecker()
override var caseSensitive : Boolean = true
set(value) {
if(value) {
keyChecker = ICharTrie.CaseSensitiveKeyChecker()
} else {
keyChecker = ICharTrie.CaseInsensitiveKeyChecker()
}
field = value
}
override var distanceCalculator : ILevTrie.DistanceCalculator = ILevTrie.LevenshteinDistanceCalculator
}

View File

@@ -0,0 +1,154 @@
package net.woggioni.klevtree
import net.woggioni.jwo.tree.StackContext
import net.woggioni.jwo.tree.TreeNodeVisitor
import net.woggioni.jwo.tree.TreeWalker
import net.woggioni.klevtree.node.TrieNode
interface Trie<T : TrieNode<KEY, PAYLOAD>, KEY, PAYLOAD> {
interface Keychecker<KEY> {
fun check(key1 : KEY?, key2 : KEY?) : Boolean
}
var keyChecker : Keychecker<KEY>
val root : TrieNode<KEY, PAYLOAD>
val tails : MutableList<TrieNode<KEY, PAYLOAD>>
val words : Iterable<List<KEY>>
get() {
val res = object : Iterator<List<KEY>> {
val it = tails.iterator()
override fun hasNext(): Boolean {
return it.hasNext()
}
override fun next(): List<KEY> {
return it.next().linealDescendant()
}
}
return object : Iterable<List<KEY>> {
override fun iterator() : Iterator<List<KEY>> {
return res
}
}
}
private fun addNode(key : KEY?, parent : TrieNode<KEY, PAYLOAD>, prev : TrieNode<KEY, PAYLOAD>? = null) : TrieNode<KEY, PAYLOAD> {
val result = TrieNode<KEY, PAYLOAD>(key)
result.parent = parent
if(prev != null) {
prev.next = result
result.prev = prev
} else {
when(parent.child) {
null -> parent.child = result
else -> {
var node : TrieNode<KEY, PAYLOAD>? = parent.child
while(node!!.next != null) {
node = node.next
}
node.next = result
result.prev = node
}
}
}
return result
}
fun add(path : Iterable<KEY>) : Pair<Boolean, TrieNode<KEY, PAYLOAD>?> {
var result = false
var pnode : TrieNode<KEY, PAYLOAD> = root
var length = 0
wordLoop@
for(key in path) {
++length
var cnode = pnode.child
if(cnode != null) {
while (true) {
if (cnode!!.key == key) {
pnode = cnode
continue@wordLoop
} else if (cnode.next == null) break
else cnode = cnode.next
}
}
pnode = addNode(key, pnode, cnode)
result = true
}
return if(result) {
val tail = addNode(null, pnode)
tails.add(tail)
var node : TrieNode<KEY, PAYLOAD>? = tail
while(node != null) {
++node.refCount
node = node.parent
}
Pair(true, tail)
} else {
Pair(false, pnode)
}
}
fun remove(path : List<KEY>) : Boolean {
val deleteNode = { n : TrieNode<KEY, PAYLOAD> ->
val parent = n.parent
if(parent != null && parent.child == n) {
parent.child = n.next
}
val prev = n.prev
if(prev != null) {
prev.next = n.next
}
val next = n.next
if(next != null) {
next.prev = n.prev
}
n.parent = null
}
return when(val res = search(path)) {
null -> false
else -> {
var current = res
do {
val parent = current!!.parent
if(--current.refCount == 0){
deleteNode(current)
}
current = parent
} while(current != null)
true
}
}
}
fun search(path : List<KEY>) : TrieNode<KEY, PAYLOAD>? {
var result : TrieNode<KEY, PAYLOAD>? = null
val visitor = object: TreeNodeVisitor<TrieNode<KEY, PAYLOAD>, Unit> {
override fun visitPre(stack: List<StackContext<TrieNode<KEY, PAYLOAD>, Unit>>): TreeNodeVisitor.VisitOutcome {
return if(stack.size == 1) {
TreeNodeVisitor.VisitOutcome.CONTINUE
} else {
val lastNode = stack.last().node
val index = stack.size - 2
if (index < path.size) {
if(lastNode.key == path[index]) {
TreeNodeVisitor.VisitOutcome.CONTINUE
} else {
TreeNodeVisitor.VisitOutcome.SKIP
}
} else {
if (lastNode.key == null) {
result = lastNode
}
TreeNodeVisitor.VisitOutcome.EARLY_EXIT
}
}
}
}
val walker = TreeWalker<TrieNode<KEY, PAYLOAD>, Unit>(visitor)
walker.walk(root)
return result
}
}

View File

@@ -0,0 +1,40 @@
package net.woggioni.klevtree
import net.woggioni.klevtree.node.StringNode
import net.woggioni.klevtree.node.TrieNode
interface IWordTrie<PAYLOAD> : Trie<StringNode<PAYLOAD>, String, PAYLOAD> {
class CaseInsensitiveKeyChecker : Trie.Keychecker<String> {
override fun check(key1: String?, key2: String?) = key1 == key2
}
class CaseSensitiveKeyChecker : Trie.Keychecker<String> {
override fun check(key1: String?, key2: String?) = key1?.toLowerCase() == key2?.toLowerCase()
}
var caseSensitive : Boolean
fun add(word : String, delimiter : String) = super.add(word.split(delimiter))
fun search(word : String, delimiter : String) : TrieNode<String, PAYLOAD>? {
return search(word.split(delimiter))
}
}
class WordTrie<PAYLOAD> : IWordTrie<PAYLOAD> {
override val root: TrieNode<String, PAYLOAD> = StringNode(null)
override val tails = mutableListOf<TrieNode<String, PAYLOAD>>()
override var keyChecker: Trie.Keychecker<String> = IWordTrie.CaseSensitiveKeyChecker()
override var caseSensitive : Boolean = true
set(value) {
if(value) {
keyChecker = IWordTrie.CaseSensitiveKeyChecker()
} else {
keyChecker = IWordTrie.CaseInsensitiveKeyChecker()
}
field = value
}
}

View File

@@ -0,0 +1,58 @@
package net.woggioni.klevtree.node
import net.woggioni.jwo.tree.TreeNode
open class TrieNode<T, PAYLOAD>(val key : T?) : TreeNode<TrieNode<T, PAYLOAD>> {
var parent : TrieNode<T, PAYLOAD>? = null
var child : TrieNode<T, PAYLOAD>? = null
var next : TrieNode<T, PAYLOAD>? = null
var prev : TrieNode<T, PAYLOAD>? = null
var payload : PAYLOAD? = null
var refCount = 0
override fun children(): Iterator<TrieNode<T, PAYLOAD>> {
return object : Iterator<TrieNode<T, PAYLOAD>> {
var nextChild : TrieNode<T, PAYLOAD>? = child
override fun hasNext(): Boolean = nextChild != null
override fun next(): TrieNode<T, PAYLOAD> {
val result = nextChild
nextChild = nextChild?.next
return result!!
}
}
}
fun linealDescendant() : List<T> {
var node : TrieNode<T, PAYLOAD>? = this
val chars = mutableListOf<T>()
while(node != null) {
val key = node.key
if(key != null) {
chars.add(key)
}
node = node.parent
}
return chars.asReversed()
}
// fun root(node: TrieNode<Char>) : String {
// var node : TrieNode<Char>? = node
// val chars = mutableListOf<Char>()
// while(node != null) {
// val key = node.key
// if(key != Character.MIN_VALUE) {
// chars.add(node.key)
// }
// node = node.parent
// }
// val sb = StringBuilder()
// for(c in chars.asReversed()) sb.append(c)
// return sb.toString()
// }
}
class CharNode<PAYLOAD>(key : Char?) : TrieNode<Char, PAYLOAD>(key)
class StringNode<PAYLOAD>(key : String?) : TrieNode<String, PAYLOAD>(key)

View File

@@ -0,0 +1,33 @@
package net.woggioni.klevtree
import org.junit.Test
import java.io.BufferedReader
import java.io.InputStreamReader
class LevtreeTest {
@Test
fun foo() {
val reader = BufferedReader(
InputStreamReader(javaClass.getResourceAsStream("/cracklib-small")))
val tree = LevTrie()
tree.caseSensitive = false
try {
for(line in reader.lines()) {
tree.add(line.asIterable())
}
} finally {
reader.close()
}
println(tree.add("dailies"))
var node = tree.search("dailies")
println(node!!.linealDescendant())
tree.remove("dailies")
node = tree.search("dailies")
println(node)
tree.algorithm = ILevTrie.Algorithm.DAMERAU_LEVENSHTEIN
val result = tree.fuzzySearch("daiiles", 5)
println(result)
}
}

View File

@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="WARN">
<Appenders>
<Console name="Console" target="SYSTEM_ERR">
<PatternLayout pattern="%d{HH:mm:ss,SSS} %highlight{[%p]} (%t) %c: %m%n"/>
<Filters>
<ThresholdFilter level="ERROR" onMatch="ACCEPT" />
</Filters>
</Console>
</Appenders>
<Loggers>
<Root level="ALL">
<AppenderRef ref="Console"/>
</Root>
</Loggers>
</Configuration>