From e81a30fa76a8d2589e641468290bfac2a4add525 Mon Sep 17 00:00:00 2001
From: Kai Reinhard <K.Reinhard@micromata.de>
Date: Mon, 10 Dec 2018 19:02:15 +0000
Subject: [PATCH] Own cache for archive content files (for millions of file system items) including gzip.

---
 borgbutler-core/src/main/java/de/micromata/borgbutler/cache/ArchiveFilelistCache.java     |   89 ++++++++++++++++++++++
 borgbutler-core/src/main/java/de/micromata/borgbutler/cache/JCSCache.java                 |    8 +
 borgbutler-core/src/test/java/de/micromata/borgbutler/cache/ArchiveFilelistCacheTest.java |   58 ++++++++++++++
 borgbutler-core/out/test/resources/log4j.properties                                       |    2 
 borgbutler-core/src/main/java/de/micromata/borgbutler/cache/ButlerCache.java              |   48 +++++-------
 borgbutler-core/src/test/java/de/micromata/borgbutler/cache/CacheTest.java                |    6 
 borgbutler-core/src/test/resources/log4j.properties                                       |    2 
 7 files changed, 177 insertions(+), 36 deletions(-)

diff --git a/borgbutler-core/out/test/resources/log4j.properties b/borgbutler-core/out/test/resources/log4j.properties
index be0a5f2..7a86484 100644
--- a/borgbutler-core/out/test/resources/log4j.properties
+++ b/borgbutler-core/out/test/resources/log4j.properties
@@ -5,4 +5,4 @@
 log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
 
 # Pattern to output the caller's file name and line number.
-log4j.appender.stdout.layout.ConversionPattern=%5p [%t] (%F:%L) - %m%n
+log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %5p [%t] (%F:%L) - %m%n
diff --git a/borgbutler-core/src/main/java/de/micromata/borgbutler/cache/ArchiveFilelistCache.java b/borgbutler-core/src/main/java/de/micromata/borgbutler/cache/ArchiveFilelistCache.java
new file mode 100644
index 0000000..0f51789
--- /dev/null
+++ b/borgbutler-core/src/main/java/de/micromata/borgbutler/cache/ArchiveFilelistCache.java
@@ -0,0 +1,89 @@
+package de.micromata.borgbutler.cache;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import de.micromata.borgbutler.config.BorgRepoConfig;
+import de.micromata.borgbutler.json.borg.Archive;
+import de.micromata.borgbutler.json.borg.FilesystemItem;
+import de.micromata.borgbutler.utils.ReplaceUtils;
+import lombok.Getter;
+import org.apache.commons.collections4.CollectionUtils;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+import java.util.List;
+
+class ArchiveFilelistCache {
+    private static Logger log = LoggerFactory.getLogger(ArchiveFilelistCache.class);
+    public static final String CACHE_ARCHIVE_LISTS_BASENAME = "archive-content-";
+    private File cacheDir;
+
+    @JsonIgnore
+    @Getter
+    private Archive archive;
+    @JsonProperty
+    private List<FilesystemItem> content;
+
+    public void save(BorgRepoConfig repoConfig, Archive archive, List<FilesystemItem> filesystemItems) {
+        File file = getFile(repoConfig, archive);
+        if (CollectionUtils.isEmpty(filesystemItems)) {
+            return;
+        }
+        log.info("Saving archive content as file list: " + file.getAbsolutePath());
+        try (ObjectOutputStream outputStream = new ObjectOutputStream(new GzipCompressorOutputStream(new FileOutputStream(file)))) {
+            outputStream.writeObject(filesystemItems.size());
+            for (FilesystemItem item : filesystemItems) {
+                outputStream.writeObject(item);
+            }
+            outputStream.writeObject("EOF");
+        } catch (IOException ex) {
+            log.error("Error while writing file list '" + file.getAbsolutePath() + "': " + ex.getMessage(), ex);
+        }
+        log.info("Saving done.");
+    }
+
+    public FilesystemItem[] load(BorgRepoConfig repoConfig, Archive archive) {
+        File file = getFile(repoConfig, archive);
+        if (!file.exists()) {
+            return null;
+        }
+        log.info("Loading archive content as file list from: " + file.getAbsolutePath());
+        FilesystemItem[] list = null;
+        try (ObjectInputStream inputStream = new ObjectInputStream(new GzipCompressorInputStream(new FileInputStream(file)))) {
+            Object obj = inputStream.readObject();
+            if (!(obj instanceof Integer)) {
+                log.error("Can't load archive content. Integer expected, but received: " + obj.getClass());
+                return null;
+            }
+            int size = (Integer) obj;
+            list = new FilesystemItem[size];
+            for (int i = 0; i < size; i++) {
+                obj = inputStream.readObject();
+                if (obj instanceof FilesystemItem) {
+                    list[i] = (FilesystemItem) obj;
+                } else {
+                    log.error("Can't load archive content. FilesystemItem expected, but received: " + obj.getClass()
+                            + " at position " + i + ".");
+                    return null;
+                }
+            }
+        } catch (IOException | ClassNotFoundException ex) {
+            log.error("Error while reading file list '" + file.getAbsolutePath() + "': " + ex.getMessage(), ex);
+        }
+        log.info("Loading done.");
+        return list;
+    }
+
+    private File getFile(BorgRepoConfig repoConfig, Archive archive) {
+        return new File(cacheDir, ReplaceUtils.encodeFilename(CACHE_ARCHIVE_LISTS_BASENAME + "-"
+                + repoConfig.getRepo() + "-" + archive.getArchive() + ".gz", true));
+    }
+
+    ArchiveFilelistCache(File cacheDir) {
+        this.cacheDir = cacheDir;
+    }
+}
+
diff --git a/borgbutler-core/src/main/java/de/micromata/borgbutler/cache/ButlerCache.java b/borgbutler-core/src/main/java/de/micromata/borgbutler/cache/ButlerCache.java
index 0b1a425..fb1e288 100644
--- a/borgbutler-core/src/main/java/de/micromata/borgbutler/cache/ButlerCache.java
+++ b/borgbutler-core/src/main/java/de/micromata/borgbutler/cache/ButlerCache.java
@@ -1,10 +1,10 @@
 package de.micromata.borgbutler.cache;
 
-import com.fasterxml.jackson.annotation.JsonIgnore;
 import de.micromata.borgbutler.BorgCommands;
 import de.micromata.borgbutler.config.BorgRepoConfig;
 import de.micromata.borgbutler.config.ConfigurationHandler;
 import de.micromata.borgbutler.json.borg.*;
+import org.apache.commons.collections4.CollectionUtils;
 import org.apache.commons.jcs.JCS;
 import org.apache.commons.jcs.access.CacheAccess;
 import org.apache.commons.lang3.StringUtils;
@@ -17,16 +17,13 @@
 
 public class ButlerCache {
     private static Logger log = LoggerFactory.getLogger(ButlerCache.class);
-    public static final String CACHE_DIR_NAME = "caches";
+    public static final String CACHE_DIR_NAME = "cache";
     private static ButlerCache instance = new ButlerCache();
 
-    private JCSCache jcsCache = JCSCache.getInstance();
+    private JCSCache jcsCache;
     private CacheAccess<String, RepoInfo> repoInfoCacheAccess;
     private CacheAccess<String, RepoList> repoListCacheAccess;
-    private CacheAccess<String, List<FilesystemItem>> archiveContentCacheAccess;
-
-    @JsonIgnore
-    private File cacheDir;
+    private ArchiveFilelistCache archiveFilelistCache;
 
     public static ButlerCache getInstance() {
         return instance;
@@ -79,41 +76,36 @@
         return repoList;
     }
 
-    public List<FilesystemItem> getArchiveContent(BorgRepoConfig repoConfig, Archive archive) {
+    public FilesystemItem[] getArchiveContent(BorgRepoConfig repoConfig, Archive archive) {
         if (archive == null || StringUtils.isBlank(archive.getArchive())) {
             return null;
         }
-        String repoArchiveId = getRepoArchiveId(repoConfig.getRepo(), archive.getId());
-        List<FilesystemItem> content = archiveContentCacheAccess.get(repoArchiveId);
-        if (content == null) {
-            content = BorgCommands.listArchiveContent(repoConfig, archive);
-            archiveContentCacheAccess.put(repoArchiveId, content);
-            archiveContentCacheAccess.getStatistics();
+        FilesystemItem[] items = archiveFilelistCache.load(repoConfig, archive);
+        if (items == null) {
+            List<FilesystemItem> list = BorgCommands.listArchiveContent(repoConfig, archive);
+            if (CollectionUtils.isNotEmpty(list)) {
+                archiveFilelistCache.save(repoConfig, archive, list);
+                items = list.toArray(new FilesystemItem[0]);
+            }
         }
-        log.info("archiveContentCacheAccess.stats: " + this.archiveContentCacheAccess.getStats());
-        if (content == null) {
+        if (items == null) {
             log.warn("Repo::archiv with name '" + repoConfig.getRepo() + "::" + archive.getArchive() + "' not found.");
         }
-        return content;
-    }
-
-    public String getRepoArchiveId(String repo, String archiveId) {
-        return repo + "::" + archiveId;
+        return items;
     }
 
     public void shutdown() {
-        log.info("archiveContentCacheAccess.stats: " + this.archiveContentCacheAccess.getStats());
         JCS.shutdown();
     }
 
+    public File getCacheDir() {
+        return jcsCache.getCacheDir();
+    }
+
     private ButlerCache() {
-        cacheDir = new File(ConfigurationHandler.getInstance().getWorkingDir(), CACHE_DIR_NAME);
-        if (!cacheDir.exists()) {
-            log.info("Creating cache dir: " + cacheDir.getAbsolutePath());
-            cacheDir.mkdir();
-        }
+        this.jcsCache = JCSCache.getInstance();
         this.repoInfoCacheAccess = jcsCache.getJCSCache("repoInfo");
         this.repoListCacheAccess = jcsCache.getJCSCache("repoList");
-        this.archiveContentCacheAccess = jcsCache.getJCSCache("archiveContent");
+        this.archiveFilelistCache = new ArchiveFilelistCache(getCacheDir());
     }
 }
diff --git a/borgbutler-core/src/main/java/de/micromata/borgbutler/cache/JCSCache.java b/borgbutler-core/src/main/java/de/micromata/borgbutler/cache/JCSCache.java
index 9dbadb6..f5466de 100644
--- a/borgbutler-core/src/main/java/de/micromata/borgbutler/cache/JCSCache.java
+++ b/borgbutler-core/src/main/java/de/micromata/borgbutler/cache/JCSCache.java
@@ -1,7 +1,7 @@
 package de.micromata.borgbutler.cache;
 
-import de.micromata.borgbutler.config.Configuration;
 import de.micromata.borgbutler.config.ConfigurationHandler;
+import lombok.Getter;
 import org.apache.commons.jcs.JCS;
 import org.apache.commons.jcs.access.CacheAccess;
 import org.slf4j.Logger;
@@ -22,13 +22,15 @@
         return instance;
     }
 
+    @Getter
+    private File cacheDir;
+
     public <K, V> CacheAccess<K, V> getJCSCache(String region) {
         return JCS.getInstance(region);
     }
 
     private JCSCache() {
-        Configuration configuration = ConfigurationHandler.getConfiguration();
-        File cacheDir = new File(ConfigurationHandler.getInstance().getWorkingDir(), CACHE_DIR_NAME);
+        cacheDir = new File(ConfigurationHandler.getInstance().getWorkingDir(), CACHE_DIR_NAME);
         if (!cacheDir.exists()) {
             log.info("Creating cache dir: " + cacheDir.getAbsolutePath());
             cacheDir.mkdir();
diff --git a/borgbutler-core/src/test/java/de/micromata/borgbutler/cache/ArchiveFilelistCacheTest.java b/borgbutler-core/src/test/java/de/micromata/borgbutler/cache/ArchiveFilelistCacheTest.java
new file mode 100644
index 0000000..993043f
--- /dev/null
+++ b/borgbutler-core/src/test/java/de/micromata/borgbutler/cache/ArchiveFilelistCacheTest.java
@@ -0,0 +1,58 @@
+package de.micromata.borgbutler.cache;
+
+import de.micromata.borgbutler.config.BorgRepoConfig;
+import de.micromata.borgbutler.json.borg.Archive;
+import de.micromata.borgbutler.json.borg.FilesystemItem;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+
+public class ArchiveFilelistCacheTest {
+    private static Logger log = LoggerFactory.getLogger(ArchiveFilelistCacheTest.class);
+
+    @Test
+    void readWriteTest() throws Exception {
+        List<FilesystemItem> list = new ArrayList<>();
+        for (int i = 0; i < 1000000; i++) {
+            list.add(create(i));
+        }
+        ArchiveFilelistCache cache = new ArchiveFilelistCache(new File("out"));
+        BorgRepoConfig repoConfig = new BorgRepoConfig();
+        repoConfig.setRepo("repo");
+        Archive archive = new Archive();
+        set(archive, "archive", "archive-2018-12-10");
+        log.info("Saving " + list.size() + " items to out dir.");
+        cache.save(repoConfig, archive, list);
+        log.info("Saving done.");
+        log.info("Loading items from out dir.");
+        FilesystemItem[] filesystemItems = cache.load(repoConfig, archive);
+        log.info("Loading " + filesystemItems.length + " items done.");
+        assertEquals(list.size(), filesystemItems.length);
+        for (int i = 0; i < filesystemItems.length; i++) {
+            assertEquals(list.get(i).getPath(), filesystemItems[i].getPath());
+        }
+    }
+
+    private FilesystemItem create(int i) throws Exception {
+        FilesystemItem item = new FilesystemItem();
+        set(item, "type", "-").set(item, "mode", "drwxr-xr-x")
+                .set(item, "user", "kai").set(item, "group", "user")
+                .set(item, "path", "/Users/kai/Test" + i + ".java").set(item, "size", 1000);
+        return item;
+    }
+
+    private ArchiveFilelistCacheTest set(Object obj, String field, Object value) throws Exception {
+        Field f1 = obj.getClass().getDeclaredField(field);
+        f1.setAccessible(true);
+        f1.set(obj, value);
+        return this;
+    }
+}
diff --git a/borgbutler-core/src/test/java/de/micromata/borgbutler/cache/CacheTest.java b/borgbutler-core/src/test/java/de/micromata/borgbutler/cache/CacheTest.java
index ebf96e3..a4d0a87 100644
--- a/borgbutler-core/src/test/java/de/micromata/borgbutler/cache/CacheTest.java
+++ b/borgbutler-core/src/test/java/de/micromata/borgbutler/cache/CacheTest.java
@@ -69,10 +69,10 @@
         }
         {
             if (archive != null) {
-                List<FilesystemItem> content = ButlerCache.getInstance().getArchiveContent(repoConfig, archive);
-                log.info("Number of items (content) of archive: " + content.size());
+                FilesystemItem[] content = ButlerCache.getInstance().getArchiveContent(repoConfig, archive);
+                log.info("Number of items (content) of archive: " + content.length);
                 content = ButlerCache.getInstance().getArchiveContent(repoConfig, archive);
-                log.info("Number of items (content) of archive: " + content.size());
+                log.info("Number of items (content) of archive: " + content.length);
             }
         }
         ButlerCache.getInstance().shutdown();
diff --git a/borgbutler-core/src/test/resources/log4j.properties b/borgbutler-core/src/test/resources/log4j.properties
index be0a5f2..7a86484 100644
--- a/borgbutler-core/src/test/resources/log4j.properties
+++ b/borgbutler-core/src/test/resources/log4j.properties
@@ -5,4 +5,4 @@
 log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
 
 # Pattern to output the caller's file name and line number.
-log4j.appender.stdout.layout.ConversionPattern=%5p [%t] (%F:%L) - %m%n
+log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %5p [%t] (%F:%L) - %m%n

--
Gitblit v1.10.0