Revisions for untitled paste

View the changes made to this paste.

unlisted ⁨2⁩ ⁨files⁩ 2023-07-27 16:31:06 UTC

Cargo.toml

@@ -0,0 +1,10 @@

+[package]
+name = "rust"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+regex = "1.8.4"
+serde_json = { version = "1.0.79", features = ["preserve_order"] }
+serde = { version = "1.0.136", features = ["derive"] }
+libc = "0.2.147"

src/main.rs

@@ -0,0 +1,217 @@

+#![feature(exit_status_error)]
+
+// "[()] = [...]" is needed because of clippy bug: https://github.com/rust-lang/rust-clippy/issues/9048 . So when you see "[()] = [f()]", just assume "() = f()"
+
+macro_rules! if_cmd {
+    ($($cmd:tt)+) => {
+        std::process::Command::new("bash").arg("-c").arg(format!("set -e; set -u; set -o pipefail; shopt -s inherit_errexit; shopt -s nullglob; {}", format!($($cmd)+))).status().unwrap().code().unwrap() == 0
+    }
+}
+
+macro_rules! cmd {
+    ($($cmd:tt)+) => {
+        [()] = [std::process::Command::new("bash").arg("-c").arg(format!("set -e; set -u; set -o pipefail; shopt -s inherit_errexit; shopt -s nullglob; {}", format!($($cmd)+))).status().unwrap().exit_ok().unwrap()]
+    }
+}
+
+fn main() {
+    assert_eq!(unsafe { libc::chdir(b"/\x00".as_ptr() as *const std::ffi::c_char) }, 0);
+
+    {
+        cmd!("[ \"$(id -u)\" != 0 ]");
+    }
+
+    // Init
+    {
+        cmd!("sudo cset shield --reset > /dev/null || :");
+        cmd!("sudo cset shield -c 1-4 -k on > /dev/null");
+        let pid = unsafe { libc::getpid() };
+        assert!(pid > 1);
+        cmd!("sudo cset proc --move --pid={pid} --toset=user > /dev/null");
+
+        cmd!("sudo bash -c 'echo 0 > /proc/sys/kernel/randomize_va_space'");
+
+        cmd!("sudo bash -c 'echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo'");
+
+        cmd!("sudo bash -c 'for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do echo performance > $i; done'");
+
+        cmd!("sudo sysctl vm.overcommit_memory=2 > /dev/null");
+
+        cmd!("sudo swapoff -a");
+    }
+
+    const REPO: &str = "/home/user/dedup-bench/fs/repo";
+    #[derive(Debug)]
+    enum Method {
+        ZStd,
+        Borg { chunker_params: String, compression: String, encryption: String, },
+        Casync { chunker_params: String, },
+        Zpaq,
+        Azwyon { chunk_size: usize, zstd: i32, digest: String, check_extracted: bool },
+        Desync { chunker_params: String, },
+    }
+    let range = 0..=11;
+    // let range = 0..=2;
+    // let range = 0..=5;
+    cmd!("rm -rf {REPO}");
+    #[derive(Debug)]
+    struct Run {
+        method: Method,
+        size: usize,
+        compression: Vec<std::time::Duration>,
+        decompression: Vec<std::time::Duration>,
+        total_compression: std::time::Duration,
+        total_decompression: std::time::Duration,
+    }
+    let mut methods = vec![];
+    for _ in [()] {
+        methods.push(Method::ZStd);
+        for chunker_params in [
+            /*"buzhash,19,23,21,4095", // borg default
+            "buzhash,10,23,16,4095", // borg alternative
+            "buzhash,14,18,16,4095", // casync default
+            "buzhash,19,23,21,48", // borg default (casync's window size)
+            "buzhash,10,23,16,48", // borg alternative
+            "buzhash,14,18,16,48", // casync default*/
+            "fixed,4194304",
+        ] {
+            for compression in [
+                "zstd,3",
+            ] {
+                for encryption in [
+                    "authenticated-blake2",
+                ] {
+                    methods.push(Method::Borg { chunker_params: chunker_params.to_owned(), compression: compression.to_owned(), encryption: encryption.to_owned() });
+                }
+            }
+        }
+        for chunker_params in [
+            //format!("{}:{}:{}", 1 << 19, 1 << 21, 1 << 23), // borg default
+            //format!("{}:{}:{}", 1 << 10, 1 << 16, 1 << 23), // borg alternative
+            format!("{}:{}:{}", 1 << 14, 1 << 16, 1 << 18), // casync default
+        ] {
+            methods.push(Method::Casync { chunker_params: chunker_params.to_owned(), });
+        }
+        methods.push(Method::Zpaq);
+        for chunk_size in [4194304] {
+            for zstd in [3] {
+                for digest in ["blake3"] {
+                    for check_extracted in [false] {
+                        methods.push(Method::Azwyon { chunk_size, zstd, digest: digest.to_owned(), check_extracted });
+                    }
+                }
+            }
+        }
+        for chunker_params in [
+            //format!("{}:{}:{}", 1 << 9, 1 << 11, 1 << 13), // borg default
+            //format!("{}:{}:{}", 1 << 0, 1 << 6, 1 << 13), // borg alternative
+            format!("{}:{}:{}", 1 << 4, 1 << 6, 1 << 8), // casync default
+        ] {
+            methods.push(Method::Desync { chunker_params: chunker_params.to_owned(), });
+        }
+    }
+    for method in methods {
+        cmd!("mkdir {REPO}");
+
+        // Initializing
+        match &method {
+            Method::ZStd => {},
+            Method::Borg { encryption, .. } => cmd!("BORG_PASSPHRASE=password borg2 rcreate --encryption={encryption} --repo {REPO}"),
+            Method::Casync { .. } => cmd!("mkdir {REPO}/index {REPO}/storage.castr"),
+            Method::Zpaq => {},
+            Method::Azwyon { .. } => cmd!("mkdir {REPO}/chunks {REPO}/index"),
+            Method::Desync { .. } => cmd!("mkdir {REPO}/index {REPO}/storage.castr"),
+        }
+
+        let mut compression = vec![];
+
+        cmd!(": > /home/user/dedup-bench/vm/drop_caches; while [ -f /home/user/dedup-bench/vm/drop_caches ]; do sleep 0.1; done");
+        for i in range.clone() {
+            cmd!("zstd -d < /home/user/dedup-bench/stoz/{i:02}.zst > /home/user/dedup-bench/input-fs/{i:02}");
+            println!("**** {method:?} {i} compression...");
+            let now = std::time::Instant::now();
+            match &method {
+                Method::ZStd => cmd!("zstd -3 -T0 < /home/user/dedup-bench/input-fs/{i:02} > {REPO}/{i:02}.zst"),
+                Method::Borg { chunker_params, compression, encryption: _ } => cmd!("cd /home/user/dedup-bench/input-fs; BORG_PASSPHRASE=password borg2 create --repo {REPO} --chunker-params {chunker_params} --compression {compression} {i:02} {i:02}"),
+                Method::Casync { chunker_params } => cmd!("casync make --compression=zstd --chunk-size={chunker_params} --store={REPO}/storage.castr {REPO}/index/{i:02}.caibx /home/user/dedup-bench/input-fs/{i:02} > /dev/null"),
+                Method::Zpaq => cmd!("cd /home/user/dedup-bench/input-fs; zpaq add {REPO}/repo.zpaq {i:02} > /dev/null"),
+                Method::Azwyon { chunk_size, zstd, digest, check_extracted: _ } => cmd!("/home/user/dedup-bench/azwyon-dedup/target/release/azwyon-dedup make --chunk-size={chunk_size} --store={REPO} --zstd={zstd} --id={i:02} --digest={digest} < /home/user/dedup-bench/input-fs/{i:02}"),
+                Method::Desync { chunker_params } => cmd!("/desync make -m {chunker_params} -s {REPO}/storage.castr {REPO}/index/{i:02}.caibx /home/user/dedup-bench/input-fs/{i:02}"),
+            }
+            cmd!("sync -f {REPO}/");
+            compression.push(now.elapsed());
+            println!("**** {method:?} {i} compression: {:?}", now.elapsed());
+            cmd!("rm /home/user/dedup-bench/input-fs/{i:02}");
+        }
+
+        let output = std::process::Command::new("bash").arg("-ec").arg(format!("du --bytes --apparent-size -s {REPO}")).output().unwrap();
+        [()] = [output.status.exit_ok().unwrap()];
+        assert_eq!(output.stderr.len(), 0);
+        let size: usize;
+        {
+            let x = String::from_utf8(output.stdout).unwrap();
+            let caps = regex::Regex::new(r##"^(?<s>[0-9]+)\t/[-/a-z]+\n$"##).unwrap().captures_iter(&x).collect::<Vec<_>>();
+            assert!(caps.len() == 1);
+            size = caps[0]["s"].parse().unwrap();
+        }
+        {
+            let len = size
+                .to_string()
+                .chars()
+                .collect::<Vec<_>>()
+                .rchunks(3)
+                .map(|x|x.into_iter().collect::<String>())
+                .rev()
+                .collect::<Vec<_>>()
+                .join("_");
+            println!("size: {len}");
+        }
+
+        let mut decompression = vec![];
+        for i in range.clone() {
+            cmd!(": > /home/user/dedup-bench/vm/drop_caches; while [ -f /home/user/dedup-bench/vm/drop_caches ]; do sleep 0.1; done");
+            println!("**** {method:?} {i} decompression...");
+            let now = std::time::Instant::now();
+            match method {
+                Method::ZStd => cmd!("zstd -d -T0 < {REPO}/{i:02}.zst > /home/user/dedup-bench/input-fs/data"),
+                Method::Borg { .. } => cmd!("cd /home/user/dedup-bench/input-fs; BORG_PASSPHRASE=password borg2 extract --repo {REPO} {i:02} {i:02}; mv -i {i:02} data"),
+                Method::Casync { .. } => cmd!("casync extract --store={REPO}/storage.castr {REPO}/index/{i:02}.caibx /home/user/dedup-bench/input-fs/data"),
+                Method::Zpaq => cmd!("cd /home/user/dedup-bench/input-fs; zpaq extract {REPO}/repo.zpaq {i:02} > /dev/null; mv -i {i:02} data"),
+                Method::Azwyon { chunk_size: _, zstd: _, ref digest, check_extracted } => cmd!("/home/user/dedup-bench/azwyon-dedup/target/release/azwyon-dedup extract --store={REPO} --id={i:02} --to=/home/user/dedup-bench/input-fs/data --digest={digest} --check-extracted={check_extracted:?}"),
+                Method::Desync { .. } => cmd!("/desync extract -s {REPO}/storage.castr {REPO}/index/{i:02}.caibx /home/user/dedup-bench/input-fs/data"),
+            }
+            decompression.push(now.elapsed());
+            println!("**** {method:?} {i} decompression: {:?}", now.elapsed());
+            cmd!("zstd -d < /home/user/dedup-bench/stoz/{i:02}.zst | cmp - /home/user/dedup-bench/input-fs/data");
+            cmd!("rm /home/user/dedup-bench/input-fs/data");
+            //if i == 0 {
+                //break;
+            //}
+        }
+
+        cmd!("sudo rm -r {REPO}");
+
+        let total_compression   = compression.iter().sum();
+        let total_decompression = decompression.iter().sum();
+        let run = Run { method, size, compression, decompression, total_compression, total_decompression };
+        println!("{:?}", run);
+        #[derive(Debug, serde::Serialize)]
+        struct JsonRun {
+            method: String,
+            size: usize,
+            compression: Vec<u128>,
+            decompression: Vec<u128>,
+            total_compression: u128,
+            total_decompression: u128,
+        }
+        let Run { method, size, compression, decompression, total_compression, total_decompression } = run;
+        println!("{}", serde_json::to_string(&JsonRun {
+            method: format!("{:?}", method),
+            size,
+            compression: compression.into_iter().map(|x|x.as_micros()).collect(),
+            decompression: decompression.into_iter().map(|x|x.as_micros()).collect(),
+            total_compression: total_compression.as_micros(),
+            total_decompression: total_decompression.as_micros(),
+        }).unwrap());
+    }
+}