Numpy

Array

In [3]:
import numpy as np

a = np.arange(15).reshape(3, 5);
print(a.ndim) # number of axes
print(a.shape) # the dimension of array
print(a.size) # the total number of elements
print(a.dtype) # the type of the elements
print(len(a)) # 3
print(4 in a) # True
2
(3, 5)
15
int64
3
True

Create Array

In [73]:
l = [1, 2, 3, 4]

a = np.array(l); # convert list to numpy array, array() is similar as asarray()
a2 = np.array(range(10), float)

b = np.array([(1, 2), (3, 4)]); # create two-dimensional array
b2 = np.array([[1, 2], [4, 5]])

c = np.array([((1, 2), (3, 4)), ((5, 6), (7, 8))], dtype=np.int16); # create three-dimensional array

d = np.zeros((3, 4))
d2 = np.zeros_like(d); # create an zero array with the same dimension as d

e = np.ones((3, 4));
e2 = np.ones_like(a); # create an one array with the same dimension as e

emptyArray = np.empty([2, 2]); # create an array with random numbers, be faster than zeros() or ones()

f = np.arange(10, 30, 5); # create an array containing elements from 10 to 30 with step 5

g = np.linspace(0, 2, 9); # create an array containing 9 elements from 0 to 2

h = np.arange(15).reshape(3, 5); # create a 3*5 array
h2 = np.array([1, 2, 3, 4, 5], ndmin = 2); # create a 1*5 array

i = np.identity(4, dtype=float); # identify matrix

k = np.eye(4, k=0, dtype=float); # create a matrix with ones along the kth diagonal

l = np.array([2, 4, 6, 8], float)
m = np.array([0, 0, 1, 3, 2, 1], int)
n = l[m]; # [ 2.  2.  4.  8.  6.  4.]
n = l.take(m)
l2 = l[l>=6]; # create an array, in which elements satisfy some conditions
l3 = l[np.logical_and(l>3, l<7)]; # compound selection
print(2 in l) # return True, check if an element in the array

it = iter(range(10));
x = np.fromiter(it, dtype = float); # create an array from iter
True

Array Slicing

Slicing return a view of an array
In [ ]:
a = np.arange(15);
print(a);
print(a[1:4]); # [1 2 3]
print(a[1:8:2]); # [1 3 5 7]
print(a[:4]); # [0 1 2 3]
print(a[10:]); # [10 11 12 13 14]
print(a[::-1]) # reversed array

b = np.arange(15).reshape(3, 5);
print(b)
print(b[1, 2]);
print(b[1][2]);
print(b[:, 1]); # column 1, (3,)
print(b[:, :1]); # column 0, (3, 1)
print(b[:, -1]); # last column, (3,)
print(b[:, -1:]) # last column, (3, 1)

a = np.arange(15).reshape(3, 5);
a[0,::2] = -1; # change the first row to be [-1 1 -1 3 -1]
rev_arr = a[::-1, ::-1]; # reverse rows and columns
print(rev_arr)

a = np.arange(10);
a[[1, 2, 3, 4]] = 10; # set 2nd-5th elements to be 10

c = np.array( [[[  0,  1,  2], [ 10, 12, 13]], [[100,101,102], [110,112,113]]]);
print(c, c[0, 1, 2]); # 2*2*3 matrix, z axis is the first axis
for page in c:
        print(page);
        print()
        
print(c[1, ...]); # dots ... represent as many colons as needed to produce a complete indexing tuple

Array Operations

In [ ]:
a = np.array([1, 2, 3, 4]); # [1, 2, 3, 4], (4,)
b = np.arange(1, 12, 3); # [1, 4, 7, 10], (4,)

print(a+b); # addition, [ 2  6 10 14]
print(b-a); # substraction, [0 2 4 6]
print(a*b); # elementwise product, [ 1  8 21 40]
print(b/a); # division, [1.         2.         2.33333333 2.5       ]
print(b%a); # mod, [0 0 1 2]
print(a**2); # power, [ 1  4  9 16]
print(18*np.sin(a)); # sin, [ 15.14647773  16.36735368   2.54016015 -13.62244492]
print(np.sqrt(a)); # sqrt, [1.         1.41421356 1.73205081 2.        ]
print(a<3); # less than, a boolean matrix, [ True  True False False]
print(a+3); # [4 5 6 7]
print(a*3); # [ 3  6  9 12]

a = np.array([1.1, 1.5, 1.9], float)
print(np.floor(a)); # floor, [1., 1., 1.]
print(np.ceil(a)); # ceil, [2., 2., 2.]
print(np.rint(a)); # nearest integer, [1., 2., 2.]

# random
e = np.random.random((2, 3));
print(e); # create a 2*3 array
print(e.sum()); # sum
print(e.max()); # max
print(e.min()); # min
print(e.sum(axis = 0)); # sum of each column
print(e.min(axis = 1)); # min of each row
ind = e.argmax(axis=0); # get the index of maximum along axis 0
print(e.cumsum(axis = 1)); # cumulative sum of each row
e.sort(axis = 0); # sort along each column
print(e)

e = np.random.random((2, 3));
f = e.clip(0, 0.1); # values in an array can be "clipped" to be within a prespecified range
a = np.array([1, 1, 4, 5, 5, 5, 7], float); # choose the unique values, [1. 4. 5. 7.]
print(np.unique(a))

# Comparison
a = np.array([1, 2, 3, 4]);
b = np.arange(0, 8, 2);
c = a > b; # [ True False False False]
any(c); # True
all(c); # False
d = np.logical_and(a > 0, a < 3); #[ True  True False False]
np.logical_not(c); #[False,  True,  True,  True]
np.logical_or(c, d); #[ True,  True, False, False]

a = np.array([[0, 1], [3, 0]], float)
print(a.nonzero()); # get the index of none-zero elements

a = np.array([1, np.NaN, np.Inf], float)
print(np.isnan(a)); # [False, True, False]
print(np.isfinite(a)); # [True, False, False]

Insertion / Deletion

In [ ]:
a = np.arange(15).reshape(3, 5)

# numpy.insert(arr, obj, values, axis=None)[source]
# obj, insert index
# values, array like values
# axis, insert axis

# numpy.append(arr, values, axis)
# values, array like values
# axis, insert axis

# insert row
np.insert(a, 3, [10, 20, 30, 40, 50], axis = 0) # insert array
np.insert(a, 3, np.arange(5).reshape(1, -1), axis = 0) # insert numpy array
np.append(a, [[1, 2, 3, 4, 5]], axis = 0) # append 2d array
np.append(a, np.arange(5).reshape(1, -1), axis = 0) # append 2d numpy array

# insert rows
np.insert(a, 3, np.arange(10).reshape(2, -1), axis = 0) # insert numpy array
np.append(a, np.arange(10).reshape(2, -1), axis = 0) # append 2d numpy array

# insert column
np.insert(a, 5, np.arange(3), axis = 1)
np.append(a, np.arange(3).reshape(3, 1), axis = 1)

# insert columns
np.insert(a, 5, np.arange(6).reshape(2, 3), axis = 1)
np.append(a, np.arange(6).reshape(3, 2), axis = 1)
In [ ]:
a = np.arange(15).reshape(3, 5)
# numpy.delete(arr, obj, axis)

# delete row
np.delete(a, 1, axis = 0)
# delete rows
np.delete(a, [0, 1], axis = 0)

# delete column
np.delete(a, 1, axis = 1)
# delete columns
np.delete(a, [0, 1], axis = 1)

Broadcasting

In [102]:
a = np.arange(10);
b = 10;
print(a*b);
[ 0 10 20 30 40 50 60 70 80 90]

Switching Columns

In [9]:
a = np.arange(15).reshape(3, 5);

b = a[:, (1, 0, 2, 3, 4)]; # switch first and second column
c = a[(1, 0, 2), :]; # switch first row and second row

a = np.ones((3,4,5,6)) # (3, 4, 5, 6)
b = np.rollaxis(a, 1); # (4, 3, 5, 6), roll the second axis to the first axis

b = np.swapaxes(a, 0, 1); # (4, 3, 5, 6), swap the first axis and the second axis
(4, 3, 5, 6)

Shape Manipulation

In [110]:
a = np.array([1, 2, 3], float); # (3,), vector
b = a[:,np.newaxis]; # (3, 1), matrix
c = a[np.newaxis, :]; # (1, 3), matrix
d = np.expand_dims(a, axis = 0); # create a (1, 3) array
e = np.squeeze(d); # (3,), remove one-dimensional entry

a = np.arange(30)
a.shape = 2,-1,3  # -1 means "whatever is needed"
print(a.shape); # (2, 5, 3)
(2, 5, 3)
In [ ]:
# create array
a = np.floor(10*np.random.random(10));
a.shape = (2, 5);

print(a.flatten()); # return a deep copy, (10,)
print(a.ravel()) # return a view, (10,)
for e in a.flat: # flat return an iterator
    print(e);
    
print(a.T); # transpose, return a view of a
print(a.reshape(5, 2)); # return a reshaped view of a

a.shape = (5, 2); # physically change array size
a.resize(5, 2); # physically resize array, same as setting value for a.shape
In [ ]:
a = np.arange(15).reshape(3, 5);
b = a.tolist(); # convert numpy array to list

a = np.array([1, 2, 3], float)
s = a.tostring(); # convert numpy array to string
b = np.fromstring(s);

Stacking

In [137]:
a = np.arange(4);
b = np.arange(1, 12, 3);

c = np.vstack((a, b)); # (2,4)

d = a[:, np.newaxis]; # (4, 1)
e = b[:, np.newaxis]; # (4, 1)

e = np.hstack((d, e)); # (4, 2)

f = np.r_[np.array([1, 2, 3, 4]), 0, 0, np.arange(4)]; # stack vector or matrix horizontally, [1 2 3 4 0 0 0 1 2 3]

g = np.r_['0, 2, 1', np.array([1, 2, 3, 4]), np.arange(4)];
# string '0, 2, 1' specify the shape
# first number specify the axis to concatenate along
# second number specify the minimum number of dimensions to force the entries to
# third number specify which axis should contain the first array
print(g)
[[1 2 3 4]
 [0 1 2 3]]
In [ ]:
a = np.array([[1, 2], [3, 4]], float)
b = np.array([[5, 6], [7,8]], float)
print(np.concatenate((a,b), axis=0)); # (4, 2), vertical concatenation
print(np.concatenate((a,b), axis=1)); # (2, 4), horizontal concatenatio
In [14]:
arr = np.arange(4).reshape(2, 2);
print(np.tile(arr,(1,2))); # tileing over one axis
print(np.tile(arr, (2,2))); # tileing over multipled dimensions
[[0 1 0 1]
 [2 3 2 3]]
[[0 1 0 1]
 [2 3 2 3]
 [0 1 0 1]
 [2 3 2 3]]

Splitting

In [ ]:
a = np.floor(10*np.random.random((2,12))); # (2, 12)
print(a)

b = np.hsplit(a, 3); # split array horizontally to equal subarrays
print(b[0]) # (2, 4)

c = np.hsplit(a, (3, 5)); # split array horizontally after third column and fifth column
print(c[0]) # (2, 3)
print(c[1]) # (2, 2)

d = np.vsplit(a, 2); # split array into two equal subarrays horizontally
print(d[0]) # (1, 12)

View and Copy

In [ ]:
a = np.arange(4);

b = a.view();
b.shape = (2, 2);
b[1, 1] = 10; # changing b also changes a
print(a, b)
print(b.flags); # OWNDATA : determine if an array owns its own data, False for view or slicing

c = a.copy(); # deep copy
c.shape = (2, 2);
c[1, 1] = 100; # changing c does not change a
print(a, c);
print(c.flags); # OWNDATA : False, determine if an array owns its own data

d = a.flatten() 
print(d.flags) # OWNDATA : True

e = a.ravel()
print(e.flags) # OWNDATA : False

Set Value

where(boolarray, truearray, falsearray), the where function forms a new array from two arrays of equivalent size using a Boolean filter to choose between elements of the two
In [ ]:
a = np.arange(15).reshape(3, 5);
 
a[np.where(a > 5)] = 0; # where function returns the indices
print(a)

a = np.arange(15).reshape(3, 5);
temp = np.where(a > 5, True, False); # set as True for elements greater than 4; Flase, otherwise
print(a[temp])

b = np.arange(15).reshape(3, 5);
b = np.where(b <= 5, b, 0);
print(b);

c = np.arange(15).reshape(3, 5);
np.putmask(c, c>5, 0);
print(c);

a = np.array([0, 1, 2, 3, 4, 5], float)
a.put([0, 3], 10); # set the first and third elements to be 10
print(a); # [10., 1., 2., 10., 4., 5.]
In [172]:
a = np.arange(10)**2;
b = np.array([1, 1, 4, 4])
# set value
a[b] = 0;
print(a);
 
a[a > 5] = 0;
print(a);
[ 0  0  4  9  0 25 36 49 64 81]
[0 0 4 0 0 0 0 0 0 0]
In [173]:
a = np.array([1, 2, 3], float)
a.fill(0)
print(a)
[0. 0. 0.]
In [20]:
a = np.array([1.1, 2.5, 3.14], float)
b = a.astype(int); # create a deep copy and cast to a specified type
a[0] = 10;
print(a, b)
print(b.dtype); # int
[10.    2.5   3.14] [1 2 3]
int64

Linear Algebra

In [186]:
a = np.arange(4).reshape(2, 2);
b = np.arange(1, 12, 3).reshape(2, 2);
print(a, b)

print(a.dot(b)); # matrix multiplication
print(np.matmul(a, b)); # returns the matrix product of two arrays

print(np.vdot(a, b)); # returns the dot product of the two vectors

a = np.array([[4, 2, 0], [9, 3, 7], [1, 2, 1]], float)
print(np.linalg.det(a)); # determinant

a = np.arange(4).reshape(2, 2);
print(a.transpose()); # return a view
print(a.T); # save as above, return a view
print(np.linalg.inv(a));
y = np.array([[5.], [7.]])
print(np.linalg.solve(a, y));
print(np.linalg.eig(a));
[[0 1]
 [2 3]] [[ 1  4]
 [ 7 10]]
[[ 7 10]
 [23 38]]
[[ 7 10]
 [23 38]]
48
-48.00000000000003
[[0 2]
 [1 3]]
[[0 2]
 [1 3]]
[[-1.5  0.5]
 [ 1.   0. ]]
[[-4.]
 [ 5.]]
(array([-0.56155281,  3.56155281]), array([[-0.87192821, -0.27032301],
       [ 0.48963374, -0.96276969]]))

Array iteration

In [193]:
a = np.array([1, 4, 5], int)
for e in a:
    print(e) # 1, 4, 5
    
a = np.array([[1, 2], [3, 4], [5, 6]], float)
for e in a:
    print(e)
# [1. 2.]
# [3. 4.]
# [5. 6.]

a = np.array([[1, 2], [3, 4], [5, 6]], float)
for (x, y) in a:
    print(x*y) # 2.0 12.0 30.0
    
for x in np.nditer(a):
    print(x); # iterate over an array by the memory layout
1
4
5
[1. 2.]
[3. 4.]
[5. 6.]
2.0
12.0
30.0
1.0
2.0
3.0
4.0
5.0
6.0

Matrix Operations

In [205]:
a = np.array([1, 2, 3, 4]); # (4,)
b = np.arange(1, 12, 3); # (4,)

print(a.dot(b)); # 70, dot product
print(np.outer(a, b)); # outer product
print(np.inner(a, b)); # inner product

a = np.array([1, 4, 0], float)
b = np.array([2, 2, 1], float)
print(np.cross(a, b)); # cross product

a = np.array([[0, 1], [2, 3]], float)
b = np.array([2, 3], float)
print(np.dot(b, a)) # matrix multiplication

a = np.array([[4, 2, 0], [9, 3, 7], [1, 2, 1]], float)
b = np.linalg.inv(a); # inverse matrix
print(b)

b = np.array([[-3, 4], [2, -1]])
vals, vecs = np.linalg.eig(b); # calculate eigen value and eigen vector
print(vals)
print(vecs)

a = np.array([[1, 3, 4], [5, 2, 3]], float)
U, s, Vh = np.linalg.svd(a); # svd

a = np.array([[1, 2], [3, 4]], float)
print(a.diagonal()); # get diagonal
70
[[ 1  4  7 10]
 [ 2  8 14 20]
 [ 3 12 21 30]
 [ 4 16 28 40]]
70
[ 4. -1. -6.]
[ 6. 11.]
[[ 0.22916667  0.04166667 -0.29166667]
 [ 0.04166667 -0.08333333  0.58333333]
 [-0.3125      0.125       0.125     ]]
[-5.  1.]
[[-0.89442719 -0.70710678]
 [ 0.4472136  -0.70710678]]
[1. 4.]

Polynomial Operations

In [223]:
a = np.array([-1, -1]); # calcualte coefficients for given roots
print(np.poly(a))
 
a = np.array([1, 2, 1]); # calculate roots for given coeffients
print(np.roots(a))

print(np.polyint([1, 1, 1, 1])); # integrate

print(np.polyder([1./4., 1./3., 1./2., 1., 0.])); # derivate

print(np.polyval([1, -2, 0, 2], 4)); # evaluates a polynomial at a particular point
 
p = np.poly1d([1, 2, 3]); # construct the polynomial x^2 + 2x + 3
print(p);
print(p(0.5)); # calculate the value at this point

x = [1, 2, 3, 4, 5, 6, 7, 8]
y = [0, 2, 1, 3, 7, 10, 11, 19]
z = np.polyfit(x, y, 2); # fit x and y to single variable polynomial with degree 2
import matplotlib.pyplot as plt
p = np.poly1d(z); # create a polynomial object 
plt.plot(x, y, '.')
plt.plot(x, p(x), '-')
plt.show()
[1. 2. 1.]
[-1. -1.]
[0.25       0.33333333 0.5        1.         0.        ]
[1. 1. 1. 1.]
34
   2
1 x + 2 x + 3
4.25

Random

In [226]:
np.random.seed(); # use the time as the random seed
np.random.rand(3,2); # generate random between 0 and 1
np.random.randint(5, 10); # generate random integer number from [5, 10)
np.random.randint(5, size=(2, 4))

mu, sigma = 0, 0.1 # mean and standard deviation
s = np.random.normal(mu, sigma, 1000); # normal distribution
count, bins, ignored = plt.hist(s, 30, density=True)
plt.show()

Statistics

In [234]:
a = np.arange(10);

print(np.average(a));
print(np.median(a));
print(np.std(a));
print(np.var(a));

a = np.array([[1, 2, 1, 3], [5, 3, 1, 8]], float)
c = np.corrcoef(a); # Pearson product-moment correlation coefficients
print(np.cov(a)); # calculate covariance

a = np.array([[3,7,5],[8,4,3],[2,4,9]])
b = np.ptp(a, axis = 1); # returns the range (maximum-minimum) of values along an axis
print(b)

a = np.arange(10)
b = np.percentile(a, 50); # indicate the value below which a given percentage
print(b)

c = np.array([1, 4, 9, 10, 11]);
d = np.median(c); # return the median number if the number is odd; return the average of the middle two elements if the number is even
print(d)
4.5
4.5
2.8722813232690143
8.25
[[0.91666667 2.08333333]
 [2.08333333 8.91666667]]
[4 5 7]
4.5
9.0

Vectorizing Functions

In [235]:
def time10(a):
    return 10*a;
 
vecTime10 = np.vectorize(time10);
 
a = np.arange(10);
b = vecTime10(a);
print(b);
[ 0 10 20 30 40 50 60 70 80 90]

Structured Array

In [240]:
x = np.array([(1,2.,'Hello'), (2,3.,"World")], dtype=[('foo', 'i4'),('bar', 'f4'), ('baz', 'S10')]);
print(x)
print(x['foo']); # 1, 2.
print(x['bar']); # 2, 3.
print(x['baz']); # 'Hello', 'World'
 
d = x.dtype;
print(d); # [('foo', '<i4'), ('bar', '<f4'), ('baz', 'S10')]
print(d.names); # ('foo', 'bar', 'baz')
[(1, 2., b'Hello') (2, 3., b'World')]
[1 2]
[2. 3.]
[b'Hello' b'World']
[('foo', '<i4'), ('bar', '<f4'), ('baz', 'S10')]
('foo', 'bar', 'baz')

IO

delimiter=',', set delimiter as ','
dtype=None, automatically decide data type
autostrip, automatically strip strings
usecols(0, -1), read the first column and the last column
skipheader, skip the first n lines
skipfooter, skip the last n lines
In [244]:
# read the first column and the last column
# build a structured array
a = np.genfromtxt('ARG.csv', delimiter=',', dtype=None, autostrip=True, usecols=(0, -1), encoding = None);
print(a)
print(a.shape, a.dtype); # (84123,), [('f0', '<U4'), ('f1', '<f8')]
print(a[0]); # ('1rw9', 267.79942322)
[('1rw9', 267.79942322) ('1rw9', 181.67858887) ('1rw9', 171.941     ) ...
 ('3nuo', 104.26      ) ('3nuo', 286.61052704) ('3nuo',  98.083     )]
(84123,) [('f0', '<U4'), ('f1', '<f8')]
('1rw9', 267.79942322)
In [248]:
a = np.arange(10);
 
np.save('temp.npy', a); # save numpy array to a binary file
b = np.load('temp.npy'); # read a numpy array from a binary fille
 
x = np.arange(10);
y = np.sin(x);
np.savez('temp.npz', x, y); # save several arrays to a uncompressed npz file
npzfile = np.load('temp.npz');
print(npzfile.files); # ['arr_1', 'arr_0']
print(npzfile['arr_1']); # output elements in y
 
np.savetxt('temp.txt', a); # save a numpy array to a txt file
c = np.loadtxt('temp.txt'); # load a numpy array from a txt file
print(c)
['arr_0', 'arr_1']
[ 0.          0.84147098  0.90929743  0.14112001 -0.7568025  -0.95892427
 -0.2794155   0.6569866   0.98935825  0.41211849]
[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]