Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "rust"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+regex = "1.8.4"
+serde_json = { version = "1.0.79", features = ["preserve_order"] }
+serde = { version = "1.0.136", features = ["derive"] }
+libc = "0.2.147"
src/main.rs
@@ -0,0 +1,217 @@
+#![feature(exit_status_error)]
+
+// "[()] = [...]" is needed because of clippy bug: https://github.com/rust-lang/rust-clippy/issues/9048 . So when you see "[()] = [f()]", just assume "() = f()"
+
+macro_rules! if_cmd {
+ ($($cmd:tt)+) => {
+ std::process::Command::new("bash").arg("-c").arg(format!("set -e; set -u; set -o pipefail; shopt -s inherit_errexit; shopt -s nullglob; {}", format!($($cmd)+))).status().unwrap().code().unwrap() == 0
+ }
+}
+
+macro_rules! cmd {
+ ($($cmd:tt)+) => {
+ [()] = [std::process::Command::new("bash").arg("-c").arg(format!("set -e; set -u; set -o pipefail; shopt -s inherit_errexit; shopt -s nullglob; {}", format!($($cmd)+))).status().unwrap().exit_ok().unwrap()]
+ }
+}
+
+fn main() {
+ assert_eq!(unsafe { libc::chdir(b"/\x00".as_ptr() as *const std::ffi::c_char) }, 0);
+
+ {
+ cmd!("[ \"$(id -u)\" != 0 ]");
+ }
+
+ // Init
+ {
+ cmd!("sudo cset shield --reset > /dev/null || :");
+ cmd!("sudo cset shield -c 1-4 -k on > /dev/null");
+ let pid = unsafe { libc::getpid() };
+ assert!(pid > 1);
+ cmd!("sudo cset proc --move --pid={pid} --toset=user > /dev/null");
+
+ cmd!("sudo bash -c 'echo 0 > /proc/sys/kernel/randomize_va_space'");
+
+ cmd!("sudo bash -c 'echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo'");
+
+ cmd!("sudo bash -c 'for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do echo performance > $i; done'");
+
+ cmd!("sudo sysctl vm.overcommit_memory=2 > /dev/null");
+
+ cmd!("sudo swapoff -a");
+ }
+
+ const REPO: &str = "/home/user/dedup-bench/fs/repo";
+ #[derive(Debug)]
+ enum Method {
+ ZStd,
+ Borg { chunker_params: String, compression: String, encryption: String, },
+ Casync { chunker_params: String, },
+ Zpaq,
+ Azwyon { chunk_size: usize, zstd: i32, digest: String, check_extracted: bool },
+ Desync { chunker_params: String, },
+ }
+ let range = 0..=11;
+ // let range = 0..=2;
+ // let range = 0..=5;
+ cmd!("rm -rf {REPO}");
+ #[derive(Debug)]
+ struct Run {
+ method: Method,
+ size: usize,
+ compression: Vec<std::time::Duration>,
+ decompression: Vec<std::time::Duration>,
+ total_compression: std::time::Duration,
+ total_decompression: std::time::Duration,
+ }
+ let mut methods = vec![];
+ for _ in [()] {
+ methods.push(Method::ZStd);
+ for chunker_params in [
+ /*"buzhash,19,23,21,4095", // borg default
+ "buzhash,10,23,16,4095", // borg alternative
+ "buzhash,14,18,16,4095", // casync default
+ "buzhash,19,23,21,48", // borg default (casync's window size)
+ "buzhash,10,23,16,48", // borg alternative
+ "buzhash,14,18,16,48", // casync default*/
+ "fixed,4194304",
+ ] {
+ for compression in [
+ "zstd,3",
+ ] {
+ for encryption in [
+ "authenticated-blake2",
+ ] {
+ methods.push(Method::Borg { chunker_params: chunker_params.to_owned(), compression: compression.to_owned(), encryption: encryption.to_owned() });
+ }
+ }
+ }
+ for chunker_params in [
+ //format!("{}:{}:{}", 1 << 19, 1 << 21, 1 << 23), // borg default
+ //format!("{}:{}:{}", 1 << 10, 1 << 16, 1 << 23), // borg alternative
+ format!("{}:{}:{}", 1 << 14, 1 << 16, 1 << 18), // casync default
+ ] {
+ methods.push(Method::Casync { chunker_params: chunker_params.to_owned(), });
+ }
+ methods.push(Method::Zpaq);
+ for chunk_size in [4194304] {
+ for zstd in [3] {
+ for digest in ["blake3"] {
+ for check_extracted in [false] {
+ methods.push(Method::Azwyon { chunk_size, zstd, digest: digest.to_owned(), check_extracted });
+ }
+ }
+ }
+ }
+ for chunker_params in [
+ //format!("{}:{}:{}", 1 << 9, 1 << 11, 1 << 13), // borg default
+ //format!("{}:{}:{}", 1 << 0, 1 << 6, 1 << 13), // borg alternative
+ format!("{}:{}:{}", 1 << 4, 1 << 6, 1 << 8), // casync default
+ ] {
+ methods.push(Method::Desync { chunker_params: chunker_params.to_owned(), });
+ }
+ }
+ for method in methods {
+ cmd!("mkdir {REPO}");
+
+ // Initializing
+ match &method {
+ Method::ZStd => {},
+ Method::Borg { encryption, .. } => cmd!("BORG_PASSPHRASE=password borg2 rcreate --encryption={encryption} --repo {REPO}"),
+ Method::Casync { .. } => cmd!("mkdir {REPO}/index {REPO}/storage.castr"),
+ Method::Zpaq => {},
+ Method::Azwyon { .. } => cmd!("mkdir {REPO}/chunks {REPO}/index"),
+ Method::Desync { .. } => cmd!("mkdir {REPO}/index {REPO}/storage.castr"),
+ }
+
+ let mut compression = vec![];
+
+ cmd!(": > /home/user/dedup-bench/vm/drop_caches; while [ -f /home/user/dedup-bench/vm/drop_caches ]; do sleep 0.1; done");
+ for i in range.clone() {
+ cmd!("zstd -d < /home/user/dedup-bench/stoz/{i:02}.zst > /home/user/dedup-bench/input-fs/{i:02}");
+ println!("**** {method:?} {i} compression...");
+ let now = std::time::Instant::now();
+ match &method {
+ Method::ZStd => cmd!("zstd -3 -T0 < /home/user/dedup-bench/input-fs/{i:02} > {REPO}/{i:02}.zst"),
+ Method::Borg { chunker_params, compression, encryption: _ } => cmd!("cd /home/user/dedup-bench/input-fs; BORG_PASSPHRASE=password borg2 create --repo {REPO} --chunker-params {chunker_params} --compression {compression} {i:02} {i:02}"),
+ Method::Casync { chunker_params } => cmd!("casync make --compression=zstd --chunk-size={chunker_params} --store={REPO}/storage.castr {REPO}/index/{i:02}.caibx /home/user/dedup-bench/input-fs/{i:02} > /dev/null"),
+ Method::Zpaq => cmd!("cd /home/user/dedup-bench/input-fs; zpaq add {REPO}/repo.zpaq {i:02} > /dev/null"),
+ Method::Azwyon { chunk_size, zstd, digest, check_extracted: _ } => cmd!("/home/user/dedup-bench/azwyon-dedup/target/release/azwyon-dedup make --chunk-size={chunk_size} --store={REPO} --zstd={zstd} --id={i:02} --digest={digest} < /home/user/dedup-bench/input-fs/{i:02}"),
+ Method::Desync { chunker_params } => cmd!("/desync make -m {chunker_params} -s {REPO}/storage.castr {REPO}/index/{i:02}.caibx /home/user/dedup-bench/input-fs/{i:02}"),
+ }
+ cmd!("sync -f {REPO}/");
+ compression.push(now.elapsed());
+ println!("**** {method:?} {i} compression: {:?}", now.elapsed());
+ cmd!("rm /home/user/dedup-bench/input-fs/{i:02}");
+ }
+
+ let output = std::process::Command::new("bash").arg("-ec").arg(format!("du --bytes --apparent-size -s {REPO}")).output().unwrap();
+ [()] = [output.status.exit_ok().unwrap()];
+ assert_eq!(output.stderr.len(), 0);
+ let size: usize;
+ {
+ let x = String::from_utf8(output.stdout).unwrap();
+ let caps = regex::Regex::new(r##"^(?<s>[0-9]+)\t/[-/a-z]+\n$"##).unwrap().captures_iter(&x).collect::<Vec<_>>();
+ assert!(caps.len() == 1);
+ size = caps[0]["s"].parse().unwrap();
+ }
+ {
+ let len = size
+ .to_string()
+ .chars()
+ .collect::<Vec<_>>()
+ .rchunks(3)
+ .map(|x|x.into_iter().collect::<String>())
+ .rev()
+ .collect::<Vec<_>>()
+ .join("_");
+ println!("size: {len}");
+ }
+
+ let mut decompression = vec![];
+ for i in range.clone() {
+ cmd!(": > /home/user/dedup-bench/vm/drop_caches; while [ -f /home/user/dedup-bench/vm/drop_caches ]; do sleep 0.1; done");
+ println!("**** {method:?} {i} decompression...");
+ let now = std::time::Instant::now();
+ match method {
+ Method::ZStd => cmd!("zstd -d -T0 < {REPO}/{i:02}.zst > /home/user/dedup-bench/input-fs/data"),
+ Method::Borg { .. } => cmd!("cd /home/user/dedup-bench/input-fs; BORG_PASSPHRASE=password borg2 extract --repo {REPO} {i:02} {i:02}; mv -i {i:02} data"),
+ Method::Casync { .. } => cmd!("casync extract --store={REPO}/storage.castr {REPO}/index/{i:02}.caibx /home/user/dedup-bench/input-fs/data"),
+ Method::Zpaq => cmd!("cd /home/user/dedup-bench/input-fs; zpaq extract {REPO}/repo.zpaq {i:02} > /dev/null; mv -i {i:02} data"),
+ Method::Azwyon { chunk_size: _, zstd: _, ref digest, check_extracted } => cmd!("/home/user/dedup-bench/azwyon-dedup/target/release/azwyon-dedup extract --store={REPO} --id={i:02} --to=/home/user/dedup-bench/input-fs/data --digest={digest} --check-extracted={check_extracted:?}"),
+ Method::Desync { .. } => cmd!("/desync extract -s {REPO}/storage.castr {REPO}/index/{i:02}.caibx /home/user/dedup-bench/input-fs/data"),
+ }
+ decompression.push(now.elapsed());
+ println!("**** {method:?} {i} decompression: {:?}", now.elapsed());
+ cmd!("zstd -d < /home/user/dedup-bench/stoz/{i:02}.zst | cmp - /home/user/dedup-bench/input-fs/data");
+ cmd!("rm /home/user/dedup-bench/input-fs/data");
+ //if i == 0 {
+ //break;
+ //}
+ }
+
+ cmd!("sudo rm -r {REPO}");
+
+ let total_compression = compression.iter().sum();
+ let total_decompression = decompression.iter().sum();
+ let run = Run { method, size, compression, decompression, total_compression, total_decompression };
+ println!("{:?}", run);
+ #[derive(Debug, serde::Serialize)]
+ struct JsonRun {
+ method: String,
+ size: usize,
+ compression: Vec<u128>,
+ decompression: Vec<u128>,
+ total_compression: u128,
+ total_decompression: u128,
+ }
+ let Run { method, size, compression, decompression, total_compression, total_decompression } = run;
+ println!("{}", serde_json::to_string(&JsonRun {
+ method: format!("{:?}", method),
+ size,
+ compression: compression.into_iter().map(|x|x.as_micros()).collect(),
+ decompression: decompression.into_iter().map(|x|x.as_micros()).collect(),
+ total_compression: total_compression.as_micros(),
+ total_decompression: total_decompression.as_micros(),
+ }).unwrap());
+ }
+}