Anonymization
Why?
On production like database we can experiment with queries and migrations on a similar size dataset
How?
Postgresql Anonymizer + Postgres.ai
Heroku Postgres
Remora Backup
offsite, daily
Digital Ocean
anonymization
machine
Postgres.ai
Thin clones
Ask Joe
PG Anonymizer
PLAIN SQL/plpgsql
Write your own functions
SLOW
Anon chain
# download db backup and decrypt it
sshpass -f "/root/.remora" scp 9983@backup.remorabackup.com:db_backup.gpg .
gpg --passphrase-file ~/.remora_gpg decrypt --output db_backup db_backup.gpg
gpg --batch --passphrase-file ~/.remora_gpg --decrypt --output db_backup db_backup.gpg
rm db_backup.gpg
# extract
dropdb --if-exists -h localhost -p 5432 -U postgres d678q4cotr1hfd;
createdb -h localhost -p 5432 -U postgres d678q4cotr1hfd;
pg_restore -h localhost -U postgres -d d678q4cotr1hfd ~/anonymizer/db_backup
psql -h localhost -U postgres -d d678q4cotr1hfd \
-c 'TRUNCATE versions, email_histories, activity_logs, login_sessions, sessions'
# anonymize
psql -h localhost -U postgres -d d678q4cotr1hfd < ~/anonymizer/init.sql
psql -h localhost -U postgres -d d678q4cotr1hfd < ~/anonymizer/addresses.sql
psql -h localhost -U postgres -d d678q4cotr1hfd < ~/anonymizer/authors.sql
psql -h localhost -U postgres -d d678q4cotr1hfd < ~/anonymizer/bulk_invitations.sql
psql -h localhost -U postgres -d d678q4cotr1hfd < ~/anonymizer/email_histories.sql
...
# dump anonymized db
docker exec -it b5a15c7dc4e1 pg_dump_anon -h localhost -U postgres -d d678q4cotr1hfd -f /var/lib/postgresql/data/full_anon.sql
# restore anonymized
dropdb --if-exists -h localhost -p 5433 -U postgres anonymized;
createdb -h localhost -p 5433 -U postgres anonymized;
psql -h localhost -p 5433 -U postgres anonymized < /pg_data/full_anon.sql;
Example
SECURITY LABEL FOR anon ON COLUMN users.email
IS 'MASKED WITH FUNCTION anon.fake_email()';
SECURITY LABEL FOR anon ON COLUMN users.unconfirmed_email
IS 'MASKED WITH FUNCTION anon.fake_email()';
SECURITY LABEL FOR anon ON COLUMN users.name
IS 'MASKED WITH FUNCTION anon.fake_first_name()';
SECURITY LABEL FOR anon ON COLUMN users.note
IS 'MASKED WITH FUNCTION anon.lorem_ipsum(words := 2)';
SECURITY LABEL FOR anon ON COLUMN users.bio
IS 'MASKED WITH FUNCTION anon.lorem_ipsum(words := 10)';
SECURITY LABEL FOR anon ON COLUMN users.phone
IS 'MASKED WITH FUNCTION anon.random_phone()';
SECURITY LABEL FOR anon ON COLUMN users.paypal_user_id
IS 'MASKED WITH FUNCTION anon.random_phone(''1'')';
SECURITY LABEL FOR anon ON COLUMN users.current_sign_in_ip
IS 'MASKED WITH FUNCTION anon.randomize_ip(host(current_sign_in_ip))';
Example
CREATE OR REPLACE FUNCTION anon.randomize_ip(
source_ip TEXT DEFAULT NULL
)
RETURNS TEXT
AS $func$
BEGIN
IF source_ip = '' THEN
RETURN NULL;
ELSEIF LENGTH(source_ip) < 16 THEN
RETURN array_to_string(
ARRAY[
(string_to_array(source_ip, '.'))[1],
(string_to_array(source_ip, '.'))[2],
CAST(anon.random_int_between(1,254) AS TEXT),
CAST(anon.random_int_between(1,254) AS TEXT)
],
'.'
);
ELSE
RETURN '127.0.0.1';
END IF;
END;
$func$
LANGUAGE plpgsql
VOLATILE
RETURNS NULL ON NULL INPUT
SECURITY INVOKER
SET search_path=''
;
deck
By Mikhail Shirkov
deck
- 199