No description
- Rust 92.7%
- Python 7.3%
| local-s3 | ||
| perf-charts | ||
| results | ||
| rust | ||
| spark | ||
| .gitignore | ||
| README.md | ||
| rust-toolchain | ||
Ingest many json files
How to run
In a machine A (192.168.1.15 in my network):
cd local-s3
BIND=0.0.0.0 cargo run --release -- serve
In a machine B:
# Install tools
cargo install tally
sudo apt-get install dstat
# Run stat collection
dstat -tcdnm -o data/dstat.csv 1 > data/dstat.csv
# Generate files
(
cd local-s3
cargo run --release -- generate-files --host http://192.168.1.15:9000 --num-files 10000 --prefix files/10000/
)
# Use Spark
(
cd spark
sleep 3
echo Spark p0 $(date +%T) >> ../data/log.txt
tally uv run main.py --ingest --host http://192.168.1.15:9000 --files files/10000/ --table delta-spark/10000-p0/
sleep 3
echo Spark p3 $(date +%T) >> ../data/log.txt
tally uv run main.py --ingest --host http://192.168.1.15:9000 --files files/10000/ --table delta-spark/10000-p3/ --partitions 3
sleep 3
echo Spark p6 $(date +%T) >> ../data/log.txt
tally uv run main.py --ingest --host http://192.168.1.15:9000 --files files/10000/ --table delta-spark/10000-p6/ --partitions 6
sleep 3
echo Spark p10 $(date +%T) >> ../data/log.txt
tally uv run main.py --ingest --host http://192.168.1.15:9000 --files files/10000/ --table delta-spark/10000-p10/ --partitions 10
)
# Use Rust
(
cd rust
cargo build --release
sleep 3
echo Rust ts512 $(date +%T) >> ../data/log.txt
tally ./target/release/rust --host http://192.168.1.15:9000 --files files/10000/ --table delta-rust/10000-ts512/ --target-size-mib 512
sleep 3
echo Rust ts256 $(date +%T) >> ../data/log.txt
tally ./target/release/rust --host http://192.168.1.15:9000 --files files/10000/ --table delta-rust/10000-ts256/ --target-size-mib 256
sleep 3
echo Rust ts128 $(date +%T) >> ../data/log.txt
tally ./target/release/rust --host http://192.168.1.15:9000 --files files/10000/ --table delta-rust/10000-ts128/ --target-size-mib 128
)
# Check
(
cd spark
uv run main.py --show --host http://192.168.1.15:9000 --table delta-spark/10000-p0/
uv run main.py --show --host http://192.168.1.15:9000 --table delta-spark/10000-p3/
uv run main.py --show --host http://192.168.1.15:9000 --table delta-spark/10000-p6/
uv run main.py --show --host http://192.168.1.15:9000 --table delta-spark/10000-p10/
uv run main.py --show --host http://192.168.1.15:9000 --table delta-rust/10000-ts512/
uv run main.py --show --host http://192.168.1.15:9000 --table delta-rust/10000-ts256/
uv run main.py --show --host http://192.168.1.15:9000 --table delta-rust/10000-ts128/
)
# Deinstall tool
sudo apt-get remove dstat