SecretFlow隐语PSI实验环境配置与测试 配置SecretFlow 并测试两方PSI的效率
官方文档 隐语PSI Benchmark白皮书 — SecretFlow 文档
配置环境 配置conda
1 2 3 4 5 6 7 8 9 10 sudo apt-get install wget wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh bash Miniconda3-latest-Linux-x86_64.sh ~/.miniconda3 Do you wish the installer to initialize Miniconda3 by running conda init? [yes|no] [no] >>> yes source ~/.bashrcconda --version
新建conda环境
1 2 3 4 5 conda create -n sf-benchmark python=3.8 conda activate sf-benchmark pip install -U secretflow mkdir sf-benchmark cd sf-benchmark
生成.csv集合数据脚本,调用
1 python3 gene_psi.py 1000000
脚本
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 from random import randintfrom random import sampleimport csvimport sysdef random_with_N_digits (n ): range_start = 10 ** (n - 1 ) range_end = (10 **n) - 1 return randint(range_start, range_end) row_list = [] len1 = 10 **2 len2 = 10 len3 = 10 len4 = 10 if len (sys.argv) > 1 : len1 = int (sys.argv[1 ]) len2 = int (len1 / 2 ) if len (sys.argv) > 2 : len3 = int (sys.argv[2 ]) len4 = int (len3 / 2 ) print (len1, len2)for i in range (len1): data_list = [random_with_N_digits(38 )] row_list.append(data_list) row_list2 = sample(row_list, len2) for i in range (len2, len1): data_list = [random_with_N_digits(38 )] row_list2.append(data_list) row_list3 = sample(row_list, len4) for i in range (len4, len3): data_list = [random_with_N_digits(38 )] row_list3.append(data_list) print (len (row_list2))print (len (row_list3))with open ('psi_1.csv' , 'w' , newline='' ) as file: writer = csv.writer(file) writer.writerow(["id" ]) writer.writerows(row_list) with open ('psi_2.csv' , 'w' , newline='' ) as file: writer = csv.writer(file) writer.writerow(["id" ]) writer.writerows(row_list2) with open ('psi_3.csv' , 'w' , newline='' ) as file: writer = csv.writer(file) writer.writerow(["id" ]) writer.writerows(row_list3)
测试安装是否成功 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 import secretflow as sfsf.init(['alice' , 'bob' , 'carol' ], address='local' ) dev = sf.PYU('alice' ) import numpy as npfrom sklearn.datasets import load_irisdata, target = load_iris(return_X_y=True , as_frame=True ) data['uid' ] = np.arange(len (data)).astype('str' ) data['month' ] = ['Jan' ] * (2 ^20 ) + ['Feb' ] * (2 ^20 ) import osos.makedirs('./data' , exist_ok=True ) da, db, dc = data.sample(frac=0.9 ), data.sample(frac=0.8 ), data.sample(frac=0.7 ) da.to_csv('data/alice.csv' , index=False ) db.to_csv('data/bob.csv' , index=False ) dc.to_csv('data/carol.csv' , index=False ) alice, bob = sf.PYU('alice' ), sf.PYU('bob' ) spu = sf.SPU(sf.utils.testing.cluster_def(['alice' , 'bob' ])) input_path = {alice: 'data/alice.csv' , bob: 'data/bob.csv' } output_path = {alice: 'data/alice_psi.csv' , bob: 'data/bob_psi.csv' } spu.psi_csv('uid' , input_path, output_path, 'alice' ) import pandas as pddf = da.join(db.set_index('uid' ), on='uid' , how='inner' , rsuffix='_bob' , sort=True ) expected = df[da.columns].astype({'uid' : 'int64' }).reset_index(drop=True ) da_psi = pd.read_csv('data/alice_psi.csv' ) db_psi = pd.read_csv('data/bob_psi.csv' ) pd.testing.assert_frame_equal(da_psi, expected) pd.testing.assert_frame_equal(db_psi, expected) print (da_psi)
实际实验测试脚本 启动节点
1 2 3 RAY_DISABLE_REMOTE_CODE=true ray start --head --node-ip-address="192.168.31.128" --port="9394" --resources='{"alice": 2}' --include-dashboard=False ray start --address="192.168.31.128:9394" --resources='{"bob": 2}'
通过更换reports的protocol参数进行3个两方PSI协议的测试,脚本
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 import sysimport timeimport loggingfrom absl import appimport spuimport secretflow as sflogging.basicConfig(stream=sys.stdout, level=logging.INFO) cluster_def = { 'nodes' : [ {'party' : 'alice' , 'id' : 'local:0' , 'address' : '192.168.31.128:12345' , 'listen_address' : '0.0.0.0:12345' }, {'party' : 'bob' , 'id' : 'local:1' , 'address' : '192.168.31.128:12333' , 'listen_address' : '0.0.0.0:12333' }, ], 'runtime_config' : { 'protocol' : spu.spu_pb2.SEMI2K, 'field' : spu.spu_pb2.FM128, }, } def main (_ ): sf.init(parties=['alice' , 'bob' ], address='192.168.31.128:9394' ,log_to_driver=True ) alice = sf.PYU('alice' ) bob = sf.PYU('bob' ) input_path = { alice: './psi_1.csv' , bob: './psi_2.csv' , } output_path = { alice: './psi_output.csv' , bob: './psi_output.csv' , } select_keys = { alice: ['id' ], bob: ['id' ], } spu = sf.SPU(cluster_def) start = time.time() reports = spu.psi_csv( key=select_keys, input_path=input_path, output_path=output_path, receiver='alice' , protocol='ECDH_PSI_2PC' , precheck_input=False , sort=False , broadcast_result=False , ) print (f"psi reports: {reports} " ) logging.info(f"cost time: {time.time() - start} " ) sf.shutdown() if __name__ == '__main__' : app.run(main)
SKY-PSI测试 ./bin/PSI_test -r 0 -ss 20 -rs 20 -w 621 -h 20 -hash 10 & ./bin/PSI_test -r 1 -ss 20 -rs 20 -w 621 -h 20 -hash 10
./bin/PSI_test -r 0 -ss 22 -rs 22 -w 627 -h 22 -hash 10 & ./bin/PSI_test -r 1 -ss 22 -rs 22 -w 627 -h 22 -hash 10
./bin/PSI_test -r 0 -ss 23 -rs 23 -w 630 -h 23 -hash 10 & ./bin/PSI_test -r 1 -ss 23 -rs 23 -w 630 -h 23 -hash 10
./bin/PSI_test -r 0 -ss 24 -rs 24 -w 633 -h 24 -hash 11 & ./bin/PSI_test -r 1 -ss 24 -rs 24 -w 633 -h 24 -hash 11
./bin/PSI_test -r 0 -ss 25 -rs 25 -w 636 -h 25 -hash 11 & ./bin/PSI_test -r 1 -ss 25 -rs 25 -w 636 -h 25 -hash 11
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 alleysira@ubuntu:~/SKY-PSI$ cmake . alleysira@ubuntu:~/SKY-PSI$ make alleysira@ubuntu:~/SKY-PSI$ ./bin/PSI_test -r 0 -ss 20 -rs 20 -w 621 -h 20 -hash 10 -ip 127.0.0.1 -ck 0 -gm 0 & ./bin/PSI_test -r 1 -ss 20 -rs 20 -w 621 -h 20 -hash 10 -ip 127.0.0.1 -ck 0 -gm 0 alleysira@ubuntu:~/SKY-PSI$ ./bin/PSI_test -r 0 -ss 20 -rs 20 -w 621 -h 20 -hash 10 -ip 127.0.0.1 -ck 0 -gm 0 & ./bin/PSI_test -r 1 -ss 20 -rs 20 -w 621 -h 20 -hash 10 -ip 127.0.0.1 -ck 0 -gm 0 [1] 4988 Receiver matrix sent and transposed hash input computed Label Time (ms) diff (ms) __________________________________ Sender base OT finished 155.1 155.071 ****** Sender set transformed 645.4 490.352 ******* Sender transposed hash input computed 6863.1 6217.686 ********** Sender hash outputs computed and sent 8659.7 1796.587 ********* Receiver intersection computed,Intersection size:500000 Label Time (ms) diff (ms) __________________________________ Receiver base OT finished 135.9 135.929 ****** Receiver initialized 216.6 80.709 ***** Receiver set transformed 620.4 403.792 ******* Receiver matrix sent and transposed hash input computed 6829.3 6208.850 ********** Receiver intersection computed 9250.8 2421.508 ********* Receiver sent communication: 77.631 MB Receiver received communication: 4.813 MB Receiver total communication: 82.445 MB [1]+ Done ./bin/PSI_test -r 0 -ss 20 -rs 20 -w 621 -h 20 -hash 10 -ip 127.0.0.1 -ck 0 -gm 0
cm20 支持到$2^{22},2^{23} $
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 alleysira@ubuntu:~/OPRF-PSI-backup$ ./bin/PSI_test -r 0 -ss 22 -rs 22 -w 627 -h 22 -hash 11 & ./bin/PSI_test -r 1 -ss 22 -rs 22 -w 627 -h 22 -hash 11 -ip 127.0.0.1 [2] 5249 Receiver matrix sent and transposed hash input computed Label Time (ms) diff (ms) __________________________________ Sender base OT finished 688.9 688.936 ****** Sender set transformed 2053.2 1364.259 ******* Sender transposed hash input computed 28745.7 26692.532 ********** Sender hash outputs computed and sent 38459.4 9713.711 ********* Begin Receiver intersection computed,Intersection size:532 Proportion of intersection:0.382% correct! Label Time (ms) diff (ms) __________________________________ Receiver base OT finished 690.1 690.110 ****** Receiver initialized 874.8 184.698 ***** Receiver set transformed 2005.0 1130.157 ******* Receiver matrix sent and transposed hash input computed 28747.9 26742.969 ********** Receiver intersection computed 41609.8 12861.827 ********* Receiver sent communication: 313.506 MB Receiver received communication: 10.016 MB Receiver total communication: 323.522 MB alleysira@ubuntu:~/OPRF-PSI-backup$ ./bin/PSI_test -r 0 -ss 23 -rs 23 -w 627 -h 23 -hash 11 & ./bin/PSI_test -r 1 -ss 23 -rs 23 -w 627 -h 23 -hash 11 -ip 127.0.0.1 [1] 5273 Receiver matrix sent and transposed hash input computed Label Time (ms) diff (ms) __________________________________ Sender base OT finished 150.6 150.579 ***** Sender set transformed 2884.4 2733.777 ******* Sender transposed hash input computed 67371.3 64486.946 ********** Sender hash outputs computed and sent 90574.5 23203.154 ********* Begin Receiver intersection computed,Intersection size:536 Proportion of intersection:0.191% correct! Label Time (ms) diff (ms) __________________________________ Receiver base OT finished 148.4 148.435 ***** Receiver initialized 517.2 368.812 ***** Receiver set transformed 2835.7 2318.477 ******* Receiver matrix sent and transposed hash input computed 67371.8 64536.073 ********** Receiver intersection computed 97432.1 30060.339 ********* Receiver sent communication: 627.006 MB Receiver received communication: 20.016 MB Receiver total communication: 647.022 MB
实验环境 4Core/12GB
hard drive: 30GB,实验过程中硬盘空间不足,可能影响效率
cm20的元素大小为$2^{128}$,隐语默认为$10^{18}$(已修改为$10^{38}$)
cm20设置了交集个数为100(已修正为50 0000),隐语交集个数设置为集合大小一半
分别限制网络带宽为30Mbps 20ms、100Mbps 20ms和LAN进行测试,对secretflow的测试需要限制环回地址lo
1 2 3 4 5 6 7 8 9 10 ifconfig sudo tc qdisc add dev lo root handle 1: tbf rate 100mbit burst 256kb latency 800ms sudo tc qdisc add dev lo parent 1:1 handle 10: netem delay 20msec limit 8000 sudo tc qdisc del dev lo root sudo tc qdisc show dev lo
30Mbps 20ms结果 实验结果
数量级
KKRT/s
ECDH/s
BC22/s
SKY-PSI/s
百万 1000000 $2^{20}$
40.00+40.81=40.405
159.15+161.22=160.185
58.23+60.14=59.18
26.47+26.76+26.81=26.68
千万 10000000
369.75
1558.04
537.13
100Mbps 20ms结果 1 2 3 4 5 (base) jie@jie-virtual-machine:~$ sudo tc qdisc add dev lo root handle 1: tbf rate 100mbit burst 256kb latency 800ms (base) jie@jie-virtual-machine:~$ sudo tc qdisc add dev lo parent 1:1 handle 10: netem delay 20msec limit 8000 (base) jie@jie-virtual-machine:~$ sudo tc qdisc show dev lo qdisc tbf 1: root refcnt 2 rate 100Mbit burst 256Kb lat 800ms qdisc netem 10: parent 1:1 limit 8000 delay 20ms
数量级
KKRT/s
ECDH/s
BC22/s
SKY-PSI/s
百万 1000000 $2^{20}$
18.80+18.29=18.545
151.927
16.86+16.176=16.518
10.52+10.51+ 10.57=10.53
千万 10000000
138.21+139.47=138.84
1544.09
106.89+111.60=109.245
LAN实验结果
数量级
KKRT/s
ECDH/s
BC22/s
SKY-PSI/s
百万 1000000 $2^{20}$
12.85+10.65+12.63+10.19+13.11+9.57+9.82+9.68+9.93=9.84
137.29+142.37+141.59+147.97+143.38=142.52
10.33+10.318=10.324
9.2508+9.002+10.941+9.0912+9.036+9.663+10.935+10.154=9.75
千万 10000000
76.24+72.58+74.83+70.75+70.83=73.046
1374.78
79.94+78.78+81.13+78.40+78.62=79.374
蚂蚁给出的结果