Adding the values from two lists in a pairwise fashion is a rather common programming issue in the daily data processing routine. As python is not known to be the very best basis for high throughput calculations, one should know python’s limits.
Starting from the following script,
#!/usr/bin/env python l1 = [1, 2 ,3, 4, 5]*10 l2 = [1, 2 ,3, 4, 5]*10 import timeit, numpy np1 = numpy.array(l1) np2 = numpy.array(l2) print 'numpy', timeit.timeit('np1+np2', number=500000, setup="import numpy; from __main__ import np1, np2") print 'numpy-conv', timeit.timeit('numpy.array(l1)+numpy.array(l2)', number=500000, setup="import numpy; from __main__ import l1, l2") print 'numpy-2conv', timeit.timeit('np1+numpy.array(l2)', number=500000, setup="import numpy; from __main__ import np1, l2") print 'zip', timeit.timeit('[x[0]+x[1] for x in zip(l1, l2)]', number=500000, setup="from __main__ import l1, l2") print 'for', timeit.timeit('for i in range(len(l1)): l1[i]+=l2[i]', number=500000, setup="from __main__ import l1, l2")
the output gives us
Method | list additions per second |
---|---|
numpy | 5.9e5 |
numpy-conv | 3.8e4 |
numpy-2conv | 6.6e4 |
zip | 1.0e5 |
for | 1.3e5 |
C (gcc -O1) | 1.8e7 |
C (gcc -O0) | 6.2e6 |
The C code is
int main() { volatile int l1[] = {0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9}; volatile int l2[] = {0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9}; int l3[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; int i, j = 0; for (i = 0; i < 5000000; i++) { for (j = 0; j < 50; j++) { l3[j]=l1[j]+l2[j]; } } return 0; }
The volatile keyword is necessary in order to keep gcc from deleting the whole loop with -O1.