untitled paste

unlisted ⁨2⁩ ⁨files⁩ 2023-07-27 16:31:06 UTC

Cargo.toml

Raw
[package]
name = "rust"
version = "0.1.0"
edition = "2021"

[dependencies]
regex = "1.8.4"
serde_json = { version = "1.0.79", features = ["preserve_order"] }
serde = { version = "1.0.136", features = ["derive"] }
libc = "0.2.147"

src/main.rs

Raw
#![feature(exit_status_error)]

// "[()] = [...]" is needed because of clippy bug: https://github.com/rust-lang/rust-clippy/issues/9048 . So when you see "[()] = [f()]", just assume "() = f()"

macro_rules! if_cmd {
    ($($cmd:tt)+) => {
        std::process::Command::new("bash").arg("-c").arg(format!("set -e; set -u; set -o pipefail; shopt -s inherit_errexit; shopt -s nullglob; {}", format!($($cmd)+))).status().unwrap().code().unwrap() == 0
    }
}

macro_rules! cmd {
    ($($cmd:tt)+) => {
        [()] = [std::process::Command::new("bash").arg("-c").arg(format!("set -e; set -u; set -o pipefail; shopt -s inherit_errexit; shopt -s nullglob; {}", format!($($cmd)+))).status().unwrap().exit_ok().unwrap()]
    }
}

fn main() {
    assert_eq!(unsafe { libc::chdir(b"/\x00".as_ptr() as *const std::ffi::c_char) }, 0);

    {
        cmd!("[ \"$(id -u)\" != 0 ]");
    }

    // Init
    {
        cmd!("sudo cset shield --reset > /dev/null || :");
        cmd!("sudo cset shield -c 1-4 -k on > /dev/null");
        let pid = unsafe { libc::getpid() };
        assert!(pid > 1);
        cmd!("sudo cset proc --move --pid={pid} --toset=user > /dev/null");

        cmd!("sudo bash -c 'echo 0 > /proc/sys/kernel/randomize_va_space'");

        cmd!("sudo bash -c 'echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo'");

        cmd!("sudo bash -c 'for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do echo performance > $i; done'");

        cmd!("sudo sysctl vm.overcommit_memory=2 > /dev/null");

        cmd!("sudo swapoff -a");
    }

    const REPO: &str = "/home/user/dedup-bench/fs/repo";
    #[derive(Debug)]
    enum Method {
        ZStd,
        Borg { chunker_params: String, compression: String, encryption: String, },
        Casync { chunker_params: String, },
        Zpaq,
        Azwyon { chunk_size: usize, zstd: i32, digest: String, check_extracted: bool },
        Desync { chunker_params: String, },
    }
    let range = 0..=11;
    // let range = 0..=2;
    // let range = 0..=5;
    cmd!("rm -rf {REPO}");
    #[derive(Debug)]
    struct Run {
        method: Method,
        size: usize,
        compression: Vec<std::time::Duration>,
        decompression: Vec<std::time::Duration>,
        total_compression: std::time::Duration,
        total_decompression: std::time::Duration,
    }
    let mut methods = vec![];
    for _ in [()] {
        methods.push(Method::ZStd);
        for chunker_params in [
            /*"buzhash,19,23,21,4095", // borg default
            "buzhash,10,23,16,4095", // borg alternative
            "buzhash,14,18,16,4095", // casync default
            "buzhash,19,23,21,48", // borg default (casync's window size)
            "buzhash,10,23,16,48", // borg alternative
            "buzhash,14,18,16,48", // casync default*/
            "fixed,4194304",
        ] {
            for compression in [
                "zstd,3",
            ] {
                for encryption in [
                    "authenticated-blake2",
                ] {
                    methods.push(Method::Borg { chunker_params: chunker_params.to_owned(), compression: compression.to_owned(), encryption: encryption.to_owned() });
                }
            }
        }
        for chunker_params in [
            //format!("{}:{}:{}", 1 << 19, 1 << 21, 1 << 23), // borg default
            //format!("{}:{}:{}", 1 << 10, 1 << 16, 1 << 23), // borg alternative
            format!("{}:{}:{}", 1 << 14, 1 << 16, 1 << 18), // casync default
        ] {
            methods.push(Method::Casync { chunker_params: chunker_params.to_owned(), });
        }
        methods.push(Method::Zpaq);
        for chunk_size in [4194304] {
            for zstd in [3] {
                for digest in ["blake3"] {
                    for check_extracted in [false] {
                        methods.push(Method::Azwyon { chunk_size, zstd, digest: digest.to_owned(), check_extracted });
                    }
                }
            }
        }
        for chunker_params in [
            //format!("{}:{}:{}", 1 << 9, 1 << 11, 1 << 13), // borg default
            //format!("{}:{}:{}", 1 << 0, 1 << 6, 1 << 13), // borg alternative
            format!("{}:{}:{}", 1 << 4, 1 << 6, 1 << 8), // casync default
        ] {
            methods.push(Method::Desync { chunker_params: chunker_params.to_owned(), });
        }
    }
    for method in methods {
        cmd!("mkdir {REPO}");

        // Initializing
        match &method {
            Method::ZStd => {},
            Method::Borg { encryption, .. } => cmd!("BORG_PASSPHRASE=password borg2 rcreate --encryption={encryption} --repo {REPO}"),
            Method::Casync { .. } => cmd!("mkdir {REPO}/index {REPO}/storage.castr"),
            Method::Zpaq => {},
            Method::Azwyon { .. } => cmd!("mkdir {REPO}/chunks {REPO}/index"),
            Method::Desync { .. } => cmd!("mkdir {REPO}/index {REPO}/storage.castr"),
        }

        let mut compression = vec![];

        cmd!(": > /home/user/dedup-bench/vm/drop_caches; while [ -f /home/user/dedup-bench/vm/drop_caches ]; do sleep 0.1; done");
        for i in range.clone() {
            cmd!("zstd -d < /home/user/dedup-bench/stoz/{i:02}.zst > /home/user/dedup-bench/input-fs/{i:02}");
            println!("**** {method:?} {i} compression...");
            let now = std::time::Instant::now();
            match &method {
                Method::ZStd => cmd!("zstd -3 -T0 < /home/user/dedup-bench/input-fs/{i:02} > {REPO}/{i:02}.zst"),
                Method::Borg { chunker_params, compression, encryption: _ } => cmd!("cd /home/user/dedup-bench/input-fs; BORG_PASSPHRASE=password borg2 create --repo {REPO} --chunker-params {chunker_params} --compression {compression} {i:02} {i:02}"),
                Method::Casync { chunker_params } => cmd!("casync make --compression=zstd --chunk-size={chunker_params} --store={REPO}/storage.castr {REPO}/index/{i:02}.caibx /home/user/dedup-bench/input-fs/{i:02} > /dev/null"),
                Method::Zpaq => cmd!("cd /home/user/dedup-bench/input-fs; zpaq add {REPO}/repo.zpaq {i:02} > /dev/null"),
                Method::Azwyon { chunk_size, zstd, digest, check_extracted: _ } => cmd!("/home/user/dedup-bench/azwyon-dedup/target/release/azwyon-dedup make --chunk-size={chunk_size} --store={REPO} --zstd={zstd} --id={i:02} --digest={digest} < /home/user/dedup-bench/input-fs/{i:02}"),
                Method::Desync { chunker_params } => cmd!("/desync make -m {chunker_params} -s {REPO}/storage.castr {REPO}/index/{i:02}.caibx /home/user/dedup-bench/input-fs/{i:02}"),
            }
            cmd!("sync -f {REPO}/");
            compression.push(now.elapsed());
            println!("**** {method:?} {i} compression: {:?}", now.elapsed());
            cmd!("rm /home/user/dedup-bench/input-fs/{i:02}");
        }

        let output = std::process::Command::new("bash").arg("-ec").arg(format!("du --bytes --apparent-size -s {REPO}")).output().unwrap();
        [()] = [output.status.exit_ok().unwrap()];
        assert_eq!(output.stderr.len(), 0);
        let size: usize;
        {
            let x = String::from_utf8(output.stdout).unwrap();
            let caps = regex::Regex::new(r##"^(?<s>[0-9]+)\t/[-/a-z]+\n$"##).unwrap().captures_iter(&x).collect::<Vec<_>>();
            assert!(caps.len() == 1);
            size = caps[0]["s"].parse().unwrap();
        }
        {
            let len = size
                .to_string()
                .chars()
                .collect::<Vec<_>>()
                .rchunks(3)
                .map(|x|x.into_iter().collect::<String>())
                .rev()
                .collect::<Vec<_>>()
                .join("_");
            println!("size: {len}");
        }

        let mut decompression = vec![];
        for i in range.clone() {
            cmd!(": > /home/user/dedup-bench/vm/drop_caches; while [ -f /home/user/dedup-bench/vm/drop_caches ]; do sleep 0.1; done");
            println!("**** {method:?} {i} decompression...");
            let now = std::time::Instant::now();
            match method {
                Method::ZStd => cmd!("zstd -d -T0 < {REPO}/{i:02}.zst > /home/user/dedup-bench/input-fs/data"),
                Method::Borg { .. } => cmd!("cd /home/user/dedup-bench/input-fs; BORG_PASSPHRASE=password borg2 extract --repo {REPO} {i:02} {i:02}; mv -i {i:02} data"),
                Method::Casync { .. } => cmd!("casync extract --store={REPO}/storage.castr {REPO}/index/{i:02}.caibx /home/user/dedup-bench/input-fs/data"),
                Method::Zpaq => cmd!("cd /home/user/dedup-bench/input-fs; zpaq extract {REPO}/repo.zpaq {i:02} > /dev/null; mv -i {i:02} data"),
                Method::Azwyon { chunk_size: _, zstd: _, ref digest, check_extracted } => cmd!("/home/user/dedup-bench/azwyon-dedup/target/release/azwyon-dedup extract --store={REPO} --id={i:02} --to=/home/user/dedup-bench/input-fs/data --digest={digest} --check-extracted={check_extracted:?}"),
                Method::Desync { .. } => cmd!("/desync extract -s {REPO}/storage.castr {REPO}/index/{i:02}.caibx /home/user/dedup-bench/input-fs/data"),
            }
            decompression.push(now.elapsed());
            println!("**** {method:?} {i} decompression: {:?}", now.elapsed());
            cmd!("zstd -d < /home/user/dedup-bench/stoz/{i:02}.zst | cmp - /home/user/dedup-bench/input-fs/data");
            cmd!("rm /home/user/dedup-bench/input-fs/data");
            //if i == 0 {
                //break;
            //}
        }

        cmd!("sudo rm -r {REPO}");

        let total_compression   = compression.iter().sum();
        let total_decompression = decompression.iter().sum();
        let run = Run { method, size, compression, decompression, total_compression, total_decompression };
        println!("{:?}", run);
        #[derive(Debug, serde::Serialize)]
        struct JsonRun {
            method: String,
            size: usize,
            compression: Vec<u128>,
            decompression: Vec<u128>,
            total_compression: u128,
            total_decompression: u128,
        }
        let Run { method, size, compression, decompression, total_compression, total_decompression } = run;
        println!("{}", serde_json::to_string(&JsonRun {
            method: format!("{:?}", method),
            size,
            compression: compression.into_iter().map(|x|x.as_micros()).collect(),
            decompression: decompression.into_iter().map(|x|x.as_micros()).collect(),
            total_compression: total_compression.as_micros(),
            total_decompression: total_decompression.as_micros(),
        }).unwrap());
    }
}