# Scripts

# Collection

# prerequisites

Setup a bitcoin node. (opens new window)

For Linux you can follow these steps

# Download

wget https://bitcoincore.org/bin/bitcoin-core-0.21.1/bitcoin-0.21.1-x86_64-linux-gnu.tar.gz

# Untar

tar xzf bitcoin-0.21.1-x86_64-linux-gnu.tar.gz

# Install

sudo install -m 0755 -o root -g root -t /usr/local/bin bitcoin-0.21.1/bin/*

# Configure

In your /home/ folder create a .bitcoin folder and a bitcoin.conf

mkdir ~/.bitcoin && touch ~/.bitcoin/bitcoin.conf

Use your favorite editor to configure your node

# Enable Remote Procedure Call
server=1
# Default RPC port
rpcport=8332
# Needed for RPC Auth
rpcuser=<Username>
# Needed for RPC Auth
rpcpassword=<Strong>

# Usage

Run your node using bitcoind

bitcoind --daemon

NOTE: If you run this command for the first time, you need to wait full synchronization with the bitcoin network, it take hours and ~300 Go.

# extract

Please install blockchain-ekstrakto (opens new window) tool to extract blockchain dataset.
Follow README.md instructions.

nohup python3 blockchain-ekstrakto.py --source 674000 2> blockchain.err | > blockchain.DSC &

NOTE: time ~58 hours, output ~2.1 To.

# reverse

nohup tac blockchain.DSC | python3 add_addresses.py | gzip -c > blockchain.ASC.gz &

NOTE: time ~28 hours, output ~548 Go.

# Indexing

# prerequisites

export LC_ALL=C

# step 1

nohup zcat blockchain.ASC.gz | python3 make_list.py 2> step1.err | gzip -c > step1.gz &

NOTE: time ~22 hours, output ~150 Go.

# step 2

nohup zcat blockchain.ASC.gz | python3 get_addresses.py 2> step2.err | sort -T. -S10g --parallel=24 -k1,1 -k2,2n | awk 'BEGIN{old="none";}{if ($1!=old) print $0; old=$1;}' | sort -T. -S 10g --parallel=24 -nk2,2 | awk '{print "-1",$1,NR-1;}' | gzip -c > step2.gz &

NOTE: time ~19 hours, output ~22 Go.

# step 3

nohup zcat -c step1.gz step2.gz | sort -S 10g -T . -r -k2,3 --parallel=24 | gzip -c > step3.gz &

NOTE: time ~6 hours, output ~99 Go.

# step 4

nohup zcat step3.gz | python3 list_translate.py 2> step4.err | gzip -c > step4.gz &

NOTE: time ~2 hours, output ~12 Go.

# step 5

nohup zcat step4.gz | sort -S 10g -T . -nk1,1 --parallel=24 | gzip -c > step5.gz &

NOTE: time ~3 hours, output ~37 Go.

# step 6

nohup zcat blockchain.ASC.gz | python3 json_translate.py --file step5.gz 2> step6.err | gzip -c > blockchain.indexed.gz &

NOTE: time ~? hours, output ~581 Go.

# usage

nohup zcat blockchain.indexed.gz | python3 get_indexes.py 2> get_indexes.err &
gzip index_txids & gzip index_tios & gzip index_addresses &

NOTE: time ~6 hours, output ~75 Go.

# Distillation

# prerequisites

tail -n 1 step6.err | gzip -c > blockchain.header.gz
# {"blocks": 674001, "vouts": 1673052718, "addresses": 797002334, "txids": 623483734}

# addresses

nohup gunzip -c blockchain.header.gz blockchain.indexed.gz | python3 distillation_addresses.py 2> distillation_addresses.err | gzip -c > blockchain.addresses.distilled.gz &

NOTE: time ~25 hours, output ~12 Go.

# amounts

nohup gunzip -c blockchain.header.gz blockchain.indexed.gz | python3 distillation_amounts.py 2> distillation_amounts.err | gzip -c > blockchain.amounts.distilled.gz &

NOTE: time ~400 hours.

# tios

nohup gunzip -c blockchain.header.gz blockchain.indexed.gz | python3 distillation_tios.py 2> distillation_tios.err | gzip -c > blockchain.tios.distilled.gz &

# Application : address clustering

# Heuristic

nohup zcat blockchain.distilled.gz | python3 heuristic.py 2> heuristic.err | gzip -c > heuristic.gz &

NOTE: time ~22 minutes, output ~4 Go.

# Union-Find

nohup zcat heuristic.gz | python3 union-find-clusters.py -n 797002334 -i 0 -o 0 2> union-find-clusters.err | gzip -c > clusters.gz &

NOTE: time ~15 minutes, output ~3.8 Go.