improved data architecture

This commit is contained in:
2024-02-09 22:43:31 +08:00
parent f77a5c3e0d
commit e3d99fa178
34 changed files with 1188 additions and 267 deletions

View File

@@ -4,9 +4,9 @@ plugins {
dependencies {
compileOnly catalog.jakarta.persistence.api
compileOnly catalog.slf4j.api
implementation catalog.xz
implementation catalog.slf4j.api
implementation catalog.jzstd
implementation catalog.jwo
implementation catalog.commons.compress

View File

@@ -4,6 +4,7 @@ module net.woggioni.jpacrepo.impl {
requires net.woggioni.jpacrepo.api;
requires net.woggioni.jwo;
requires net.woggioni.jzstd;
requires org.slf4j;
requires org.apache.commons.compress;
exports net.woggioni.jpacrepo.impl.model;

View File

@@ -1,164 +0,0 @@
package net.woggioni.jpacrepo.impl.model;
import lombok.AccessLevel;
import lombok.NoArgsConstructor;
import lombok.SneakyThrows;
import net.woggioni.jpacrepo.api.model.PkgData;
import net.woggioni.jpacrepo.api.model.PkgId;
import net.woggioni.jwo.CollectionUtils;
import net.woggioni.jwo.Fun;
import net.woggioni.jwo.Hash;
import net.woggioni.jwo.JWO;
import net.woggioni.jwo.Tuple2;
import net.woggioni.jwo.UncloseableInputStream;
import net.woggioni.jzstd.ZstdInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.text.ParseException;
import java.time.Instant;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.TreeMap;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
@NoArgsConstructor(access = AccessLevel.PRIVATE)
public class PkgDataImpl {
@SneakyThrows
public static PkgData parseFile(Path file, net.woggioni.jpacrepo.api.model.CompressionFormat compressionFormat) {
Fun<InputStream, InputStream> decompressorStreamConstructor;
switch (compressionFormat) {
case XZ:
decompressorStreamConstructor = XZCompressorInputStream::new;
break;
case Z_STANDARD:
decompressorStreamConstructor = ZstdInputStream::from;
break;
case GZIP:
decompressorStreamConstructor = GZIPInputStream::new;
break;
default:
throw JWO.newThrowable(ParseException.class,
"Unsupported compression format '%s'", compressionFormat);
}
try(TarArchiveInputStream is = new TarArchiveInputStream(
decompressorStreamConstructor.apply(
new BufferedInputStream(
Files.newInputStream(file))))) {
var archiveEntry = is.getNextEntry();
while (archiveEntry != null) {
if (Objects.equals(".PKGINFO", archiveEntry.getName())) {
try(BufferedReader reader =
new BufferedReader(
new InputStreamReader(
new UncloseableInputStream(is)))) {
Map<String, List<String>> metadata = reader.lines().map(String::trim)
.filter(Predicate.not(String::isEmpty))
.filter(line -> !line.startsWith("#"))
.map((Fun<String, Tuple2<String, String>>) line -> {
int equals = line.indexOf("=");
if (equals < 0) {
throw JWO.newThrowable(ParseException.class,
"Error parsing .PKGINFO file in '%s'", file);
} else {
return Tuple2.newInstance(
line.substring(0, equals).trim(),
line.substring(equals + 1).trim());
}
}).collect(
Collectors.groupingBy(
Tuple2<String, String>::get_1,
TreeMap::new,
Collectors.mapping(Tuple2<String, String>::get_2,
Collectors.toUnmodifiableList())));
PkgData data = new PkgData();
data.setId(new PkgId());
data.getId().setCompressionFormat(compressionFormat);
for (Map.Entry<String, List<String>> entry : metadata.entrySet()) {
String key = entry.getKey();
List<String> value = entry.getValue();
switch (key) {
case "size":
data.setSize(Long.parseLong(value.get(0)));
break;
case "arch":
data.getId().setArch(value.get(0));
break;
case "replaces":
data.setReplaces(value.stream().collect(CollectionUtils.toUnmodifiableTreeSet()));
break;
case "packager":
data.setPackager(value.get(0));
break;
case "url":
data.setUrl(value.get(0));
break;
case "pkgname":
data.getId().setName(value.get(0));
break;
case "builddate":
data.setBuildDate(
Instant.ofEpochSecond(Long.parseLong(value.get(0))));
break;
case "license":
data.setLicense(value.get(0));
break;
case "pkgver":
data.getId().setVersion(value.get(0));
break;
case "pkgdesc":
data.setDescription(value.get(0));
break;
case "provides":
data.setProvides(value.stream().collect(CollectionUtils.toUnmodifiableTreeSet()));
break;
case "conflict":
data.setConflict(value.stream().collect(CollectionUtils.toUnmodifiableTreeSet()));
break;
case "backup":
data.setBackup(value.stream().collect(CollectionUtils.toUnmodifiableTreeSet()));
break;
case "optdepend":
data.setOptdepend(value.stream().collect(CollectionUtils.toUnmodifiableTreeSet()));
break;
case "depend":
data.setDepend(value.stream().collect(CollectionUtils.toUnmodifiableTreeSet()));
break;
case "makedepend":
data.setMakedepend(value.stream().collect(CollectionUtils.toUnmodifiableTreeSet()));
break;
case "makepkgopt":
data.setMakeopkgopt(value.stream().collect(CollectionUtils.toUnmodifiableTreeSet()));
break;
case "pkgbase":
data.setBase(value.get(0));
break;
default:
break;
}
}
try(InputStream fis = Files.newInputStream(file)) {
data.setMd5sum(Hash.hash(Hash.Algorithm.MD5, fis).toString());
}
data.setFileName(file.getFileName().toString());
return data;
}
} else {
archiveEntry = is.getNextEntry();
}
}
throw JWO.newThrowable(ParseException.class, ".PKGINFO file not found in '%s'", file);
}
}
}

View File

@@ -0,0 +1,296 @@
package net.woggioni.jpacrepo.impl.model;
import jakarta.persistence.EntityManager;
import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import net.woggioni.jpacrepo.api.model.CompressionFormat;
import net.woggioni.jpacrepo.api.model.Dependency;
import net.woggioni.jpacrepo.api.model.License;
import net.woggioni.jpacrepo.api.model.NamedEntity;
import net.woggioni.jpacrepo.api.model.NamedEntity_;
import net.woggioni.jpacrepo.api.model.Packager;
import net.woggioni.jpacrepo.api.model.PkgBase;
import net.woggioni.jpacrepo.api.model.PkgData;
import net.woggioni.jpacrepo.api.model.PkgId;
import net.woggioni.jwo.CollectionUtils;
import net.woggioni.jwo.Fun;
import net.woggioni.jwo.Hash;
import net.woggioni.jwo.JWO;
import net.woggioni.jwo.Tuple2;
import net.woggioni.jwo.UncloseableInputStream;
import net.woggioni.jzstd.ZstdInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.text.ParseException;
import java.time.Instant;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
@Slf4j
@RequiredArgsConstructor(access = AccessLevel.PRIVATE)
class NamedEntityFinder<T extends NamedEntity> {
private final Map<String, T> cache;
private final EntityManager em;
private final Class<T> cls;
private final Function<String, T> ctor;
public NamedEntityFinder(EntityManager em, Class<T> cls, Function<String, T> ctor) {
this(createLruCache(0x40000), em, cls, ctor);
}
private static <K, V> Map<K, V> createLruCache(int maxSize) {
return new LinkedHashMap<>() {
@Override
protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
return size() >= maxSize;
}
};
}
private Optional<T> queryNamedEntity(String name) {
if(log.isDebugEnabled()) {
log.debug("Querying named entity '{}' with name {}", cls.getName(), name);
}
var cb = em.getCriteriaBuilder();
var tq = cb.createQuery(cls);
var root = tq.from(cls);
tq.select(root).where(cb.equal(root.get(NamedEntity_.NAME), name));
var query = em.createQuery(tq);
query.setMaxResults(1);
return Optional.of(query.getResultList())
.filter(JWO.not(List::isEmpty))
.map(it -> it.get(0));
}
public T getByName(String name) {
return
cache.computeIfAbsent(
name,
it -> queryNamedEntity(it)
.orElseGet(() -> {
var newEntity = ctor.apply(it);
em.persist(newEntity);
return newEntity;
})
);
}
}
public class PkgDataParser {
private final NamedEntityFinder<Dependency> dependencyFinder;
private final NamedEntityFinder<Packager> packagerFinder;
private final NamedEntityFinder<License> licenseFinder;
private final NamedEntityFinder<PkgBase> pkgBaseFinder;
public PkgDataParser(EntityManager em) {
dependencyFinder = new NamedEntityFinder<>(em, Dependency.class, Dependency::of);
packagerFinder = new NamedEntityFinder<>(em, Packager.class, Packager::of);
licenseFinder = new NamedEntityFinder<>(em, License.class, License::of);
pkgBaseFinder = new NamedEntityFinder<>(em, PkgBase.class, PkgBase::of);
}
private static <T extends NamedEntity> T hydrate(
NamedEntityFinder<T> namedEntityFinder, T entity
) {
return Optional.ofNullable(entity)
.map(NamedEntity::getName)
.map(namedEntityFinder::getByName)
.orElse(null);
}
private static <T extends NamedEntity> Set<T> hydrate(
NamedEntityFinder<T> namedEntityFinder, Set<T> entites) {
return entites
.stream()
.map(it -> hydrate(namedEntityFinder, it))
.filter(Objects::nonNull)
.collect(Collectors.toUnmodifiableSet());
}
public PkgData hydrateJPA(PkgData pkgData) {
pkgData.setBase(hydrate(pkgBaseFinder, pkgData.getBase()));
pkgData.setDepend(hydrate(dependencyFinder, pkgData.getDepend()));
pkgData.setOptdepend(hydrate(dependencyFinder, pkgData.getOptdepend()));
pkgData.setProvides(hydrate(dependencyFinder, pkgData.getProvides()));
pkgData.setReplaces(hydrate(dependencyFinder, pkgData.getReplaces()));
pkgData.setBackup(hydrate(dependencyFinder, pkgData.getBackup()));
pkgData.setConflict(hydrate(dependencyFinder, pkgData.getConflict()));
pkgData.setMakedepend(hydrate(dependencyFinder, pkgData.getMakedepend()));
pkgData.setMakepkgopt(hydrate(dependencyFinder, pkgData.getMakepkgopt()));
pkgData.setPackager(hydrate(packagerFinder, pkgData.getPackager()));
pkgData.setLicense(hydrate(licenseFinder, pkgData.getLicense()));
return pkgData;
}
@SneakyThrows
public static PkgData parseFile(Path file, CompressionFormat compressionFormat) {
Fun<InputStream, InputStream> decompressorStreamConstructor = switch (compressionFormat) {
case XZ -> XZCompressorInputStream::new;
case Z_STANDARD -> ZstdInputStream::from;
case GZIP -> GZIPInputStream::new;
default -> throw JWO.newThrowable(ParseException.class,
"Unsupported compression format '%s'", compressionFormat);
};
try (TarArchiveInputStream is = new TarArchiveInputStream(
decompressorStreamConstructor.apply(
new BufferedInputStream(
Files.newInputStream(file))))) {
var archiveEntry = is.getNextEntry();
while (archiveEntry != null) {
if (Objects.equals(".PKGINFO", archiveEntry.getName())) {
try (BufferedReader reader =
new BufferedReader(
new InputStreamReader(
new UncloseableInputStream(is)))) {
Map<String, List<String>> metadata = reader.lines().map(String::trim)
.filter(Predicate.not(String::isEmpty))
.filter(line -> !line.startsWith("#"))
.map((Fun<String, Tuple2<String, String>>) line -> {
int equals = line.indexOf("=");
if (equals < 0) {
throw JWO.newThrowable(ParseException.class,
"Error parsing .PKGINFO file in '%s'", file);
} else {
return Tuple2.newInstance(
line.substring(0, equals).trim(),
line.substring(equals + 1).trim());
}
}).collect(
Collectors.groupingBy(
Tuple2<String, String>::get_1,
TreeMap::new,
Collectors.mapping(Tuple2<String, String>::get_2,
Collectors.toUnmodifiableList())));
PkgData data = new PkgData();
data.setPkgId(new PkgId());
data.getPkgId().setCompressionFormat(compressionFormat);
data.setDepend(Collections.emptySet());
data.setOptdepend(Collections.emptySet());
data.setMakedepend(Collections.emptySet());
data.setMakepkgopt(Collections.emptySet());
data.setProvides(Collections.emptySet());
data.setConflict(Collections.emptySet());
data.setLicense(Collections.emptySet());
data.setReplaces(Collections.emptySet());
data.setBackup(Collections.emptySet());
for (Map.Entry<String, List<String>> entry : metadata.entrySet()) {
String key = entry.getKey();
List<String> value = entry.getValue();
switch (key) {
case "size":
data.setSize(Long.parseLong(value.get(0)));
break;
case "arch":
data.getPkgId().setArch(value.get(0));
break;
case "replaces":
data.setReplaces(value.stream()
.map(Dependency::of)
.collect(CollectionUtils.toUnmodifiableTreeSet()));
break;
case "packager":
data.setPackager(Packager.of(value.get(0)));
break;
case "url":
data.setUrl(value.get(0));
break;
case "pkgname":
data.getPkgId().setName(value.get(0));
break;
case "builddate":
data.setBuildDate(
Instant.ofEpochSecond(Long.parseLong(value.get(0))));
break;
case "license":
data.setLicense(value.stream()
.map(License::of)
.collect(CollectionUtils.toUnmodifiableTreeSet()));
break;
case "pkgver":
data.getPkgId().setVersion(value.get(0));
break;
case "pkgdesc":
data.setDescription(value.get(0));
break;
case "provides":
data.setProvides(value.stream()
.map(Dependency::of)
.collect(CollectionUtils.toUnmodifiableTreeSet()));
break;
case "conflict":
data.setConflict(value
.stream()
.map(Dependency::of)
.collect(CollectionUtils.toUnmodifiableTreeSet()));
break;
case "backup":
data.setBackup(value
.stream()
.map(Dependency::of)
.collect(CollectionUtils.toUnmodifiableTreeSet()));
break;
case "optdepend":
data.setOptdepend(value
.stream()
.map(Dependency::of)
.collect(CollectionUtils.toUnmodifiableTreeSet()));
break;
case "depend":
data.setDepend(value.stream()
.map(Dependency::of)
.collect(CollectionUtils.toUnmodifiableTreeSet()));
break;
case "makedepend":
data.setMakedepend(value
.stream()
.map(Dependency::of)
.collect(CollectionUtils.toUnmodifiableTreeSet()));
break;
case "makepkgopt":
data.setMakepkgopt(value
.stream()
.map(Dependency::of)
.collect(CollectionUtils.toUnmodifiableTreeSet()));
break;
case "pkgbase":
data.setBase(PkgBase.of(value.get(0)));
break;
default:
break;
}
}
try (InputStream fis = Files.newInputStream(file)) {
data.setMd5sum(Hash.hash(Hash.Algorithm.MD5, fis).getBytes());
}
data.setFileName(file.getFileName().toString());
return data;
}
} else {
archiveEntry = is.getNextEntry();
}
}
throw JWO.newThrowable(ParseException.class, ".PKGINFO file not found in '%s'", file);
}
}
}