# Scripts
# Collection
# prerequisites
Setup a bitcoin node. (opens new window)
For Linux you can follow these steps
# Download
wget https://bitcoincore.org/bin/bitcoin-core-0.21.1/bitcoin-0.21.1-x86_64-linux-gnu.tar.gz
# Untar
tar xzf bitcoin-0.21.1-x86_64-linux-gnu.tar.gz
# Install
sudo install -m 0755 -o root -g root -t /usr/local/bin bitcoin-0.21.1/bin/*
# Configure
In your /home/ folder create a .bitcoin folder and a bitcoin.conf
mkdir ~/.bitcoin && touch ~/.bitcoin/bitcoin.conf
Use your favorite editor to configure your node
# Enable Remote Procedure Call
server=1
# Default RPC port
rpcport=8332
# Needed for RPC Auth
rpcuser=<Username>
# Needed for RPC Auth
rpcpassword=<Strong>
# Usage
Run your node using bitcoind
bitcoind --daemon
NOTE: If you run this command for the first time, you need to wait full synchronization with the bitcoin network, it take hours and ~300 Go.
# extract
Please install blockchain-ekstrakto (opens new window) tool to extract blockchain dataset.
Follow README.md instructions.
nohup python3 blockchain-ekstrakto.py --source 674000 2> blockchain.err | > blockchain.DSC &
NOTE: time ~58 hours, output ~2.1 To.
# reverse
nohup tac blockchain.DSC | python3 add_addresses.py | gzip -c > blockchain.ASC.gz &
NOTE: time ~28 hours, output ~548 Go.
# Indexing
# prerequisites
export LC_ALL=C
# step 1
nohup zcat blockchain.ASC.gz | python3 make_list.py 2> step1.err | gzip -c > step1.gz &
NOTE: time ~22 hours, output ~150 Go.
# step 2
nohup zcat blockchain.ASC.gz | python3 get_addresses.py 2> step2.err | sort -T. -S10g --parallel=24 -k1,1 -k2,2n | awk 'BEGIN{old="none";}{if ($1!=old) print $0; old=$1;}' | sort -T. -S 10g --parallel=24 -nk2,2 | awk '{print "-1",$1,NR-1;}' | gzip -c > step2.gz &
NOTE: time ~19 hours, output ~22 Go.
# step 3
nohup zcat -c step1.gz step2.gz | sort -S 10g -T . -r -k2,3 --parallel=24 | gzip -c > step3.gz &
NOTE: time ~6 hours, output ~99 Go.
# step 4
nohup zcat step3.gz | python3 list_translate.py 2> step4.err | gzip -c > step4.gz &
NOTE: time ~2 hours, output ~12 Go.
# step 5
nohup zcat step4.gz | sort -S 10g -T . -nk1,1 --parallel=24 | gzip -c > step5.gz &
NOTE: time ~3 hours, output ~37 Go.
# step 6
nohup zcat blockchain.ASC.gz | python3 json_translate.py --file step5.gz 2> step6.err | gzip -c > blockchain.indexed.gz &
NOTE: time ~? hours, output ~581 Go.
# usage
nohup zcat blockchain.indexed.gz | python3 get_indexes.py 2> get_indexes.err &
gzip index_txids & gzip index_tios & gzip index_addresses &
NOTE: time ~6 hours, output ~75 Go.
# Distillation
# prerequisites
tail -n 1 step6.err | gzip -c > blockchain.header.gz
# {"blocks": 674001, "vouts": 1673052718, "addresses": 797002334, "txids": 623483734}
# addresses
nohup gunzip -c blockchain.header.gz blockchain.indexed.gz | python3 distillation_addresses.py 2> distillation_addresses.err | gzip -c > blockchain.addresses.distilled.gz &
NOTE: time ~25 hours, output ~12 Go.
# amounts
nohup gunzip -c blockchain.header.gz blockchain.indexed.gz | python3 distillation_amounts.py 2> distillation_amounts.err | gzip -c > blockchain.amounts.distilled.gz &
NOTE: time ~400 hours.
# tios
nohup gunzip -c blockchain.header.gz blockchain.indexed.gz | python3 distillation_tios.py 2> distillation_tios.err | gzip -c > blockchain.tios.distilled.gz &
# Application : address clustering
# Heuristic
nohup zcat blockchain.distilled.gz | python3 heuristic.py 2> heuristic.err | gzip -c > heuristic.gz &
NOTE: time ~22 minutes, output ~4 Go.
# Union-Find
nohup zcat heuristic.gz | python3 union-find-clusters.py -n 797002334 -i 0 -o 0 2> union-find-clusters.err | gzip -c > clusters.gz &
NOTE: time ~15 minutes, output ~3.8 Go.