numpy 提供了一组高效处理数组的工具.

这个图有点误导性. 当数组是二维的时候, axis0 表示行, axis 表示列. 当数组是三维的时候, axis0 表示的则是不同的二维数组, axis1表示行, axis2 表示列

1 创建array 数组

import numpy as np

a = np.array([1,2,3]) #  1 D array
a

## array([1, 2, 3])

b = np.array([(1,2,3,4),(2,3,4,5)], dtype = float)  # 2 D array
b

## array([[1., 2., 3., 4.],
##        [2., 3., 4., 5.]])

c = np.array([[(1,2,3),(2,3,4)],[(1,1,1),(2,2,2)]], dtype = float) # 3 D array
c

## array([[[1., 2., 3.],
##         [2., 3., 4.]],
## 
##        [[1., 1., 1.],
##         [2., 2., 2.]]])

2 Create special array

use this function can create array quickly

np.zeros((2,2,3))   # Create an array of zeros

## array([[[0., 0., 0.],
##         [0., 0., 0.]],
## 
##        [[0., 0., 0.],
##         [0., 0., 0.]]])

np.ones((2,2,3)) # Create an array of 1

## array([[[1., 1., 1.],
##         [1., 1., 1.]],
## 
##        [[1., 1., 1.],
##         [1., 1., 1.]]])

np.arange(10,25,5) # 1 D  array

## array([10, 15, 20])

np.linspace(0,2,9) # start 0 , stop 2 , number is 9

## array([0.  , 0.25, 0.5 , 0.75, 1.  , 1.25, 1.5 , 1.75, 2.  ])

np.full((2,2,2),7) # create an array of number 7

## array([[[7, 7],
##         [7, 7]],
## 
##        [[7, 7],
##         [7, 7]]])

np.eye(3) # 2 D

## array([[1., 0., 0.],
##        [0., 1., 0.],
##        [0., 0., 1.]])

np.random.random((2,2,2)) # random create array

## array([[[0.54355924, 0.95804347],
##         [0.81281377, 0.67417269]],
## 
##        [[0.52049942, 0.6786279 ],
##         [0.7376216 , 0.08358359]]])

np.random.rand(10)

## array([0.9838564 , 0.7021055 , 0.44837238, 0.6726763 , 0.46093914,
##        0.25151976, 0.52308899, 0.16730325, 0.28905009, 0.11388428])

np.empty((2,2)) # create empty

## array([[-2.00000000e+000, -2.00389937e+000],
##        [ 9.88131292e-324,  2.78134232e-309]])

3 I/O

Saving and Loading On Disk

np.save("my_array",a)
np.savez("array.npz",a,b)
np.load("my_array.npy")

Saving and loading text files

np.loadtxt("file.txt)

np.genfromtxt("file.csv", delimiter = ",")

np.savetxt("file.txt",a,delimiter= "")

4 Data types

np.int64
np.float32
np.complex
np.bool
np.object
np.string_
np.unicode_

5 inspecting array

## array([1, 2, 3])

a.shape # show array dimensions

## (3,)

len(a) # array length

## 3

a.ndim # number of array dimensions

## 1

a.size # number of array elements

## 3

a.dtype.name

## 'int64'

a.astype(int)

## array([1, 2, 3])

6 Asking for help

np.info(np.random)
help(np.random)
?np.random (in Rstudio)

7 Array Mathematics

Arithmetic算数 operations


a = np.random.random((2,2,2))
b = np.random.random((2,2,2))

a-b # subtraction 减法

## array([[[ 0.07485318,  0.21861378],
##         [ 0.37748246,  0.50106951]],
## 
##        [[ 0.04316713, -0.5588189 ],
##         [-0.27114428,  0.38546714]]])

np.subtract(a,b)

## array([[[ 0.07485318,  0.21861378],
##         [ 0.37748246,  0.50106951]],
## 
##        [[ 0.04316713, -0.5588189 ],
##         [-0.27114428,  0.38546714]]])

a+b

## array([[[1.52765731, 0.90709931],
##         [0.69422455, 0.91728109]],
## 
##        [[1.3934106 , 0.74942806],
##         [0.48733621, 1.58593893]]])

np.add(a,b) # addition

## array([[[1.52765731, 0.90709931],
##         [0.69422455, 0.91728109]],
## 
##        [[1.3934106 , 0.74942806],
##         [0.48733621, 1.58593893]]])

a/b # division

## array([[[1.10304648, 1.63505699],
##         [3.38353206, 3.4077634 ]],
## 
##        [[1.06393977, 0.14569814],
##         [0.28503294, 1.64219274]]])

np.divide(a,b)

## array([[[1.10304648, 1.63505699],
##         [3.38353206, 3.4077634 ]],
## 
##        [[1.06393977, 0.14569814],
##         [0.28503294, 1.64219274]]])

a*b # multiplication 乘法

## array([[[0.58203347, 0.19375929],
##         [0.08486368, 0.14758349]],
## 
##        [[0.48493243, 0.06234096],
##         [0.04099434, 0.59165434]]])

np.multiply(a,b)

# other useful function

## array([[[0.58203347, 0.19375929],
##         [0.08486368, 0.14758349]],
## 
##        [[0.48493243, 0.06234096],
##         [0.04099434, 0.59165434]]])

np.exp(a)

## array([[[2.22833628, 1.75568052],
##         [1.70890618, 2.03231451]],
## 
##        [[2.0509208 , 1.09999384],
##         [1.11415466, 2.67969514]]])

np.sqrt(a)

## array([[[0.89512862, 0.75023766],
##         [0.73202015, 0.84212546]],
## 
##        [[0.84751924, 0.30871439],
##         [0.3287795 , 0.99282578]]])

np.sin(a)

## array([[[0.71823006, 0.53360425],
##         [0.5105751 , 0.65120813]],
## 
##        [[0.65809727, 0.09516037],
##         [0.10788557, 0.83366057]]])

np.log10(a)

## array([[[-0.09622911, -0.24960228],
##         [-0.27095392, -0.1492464 ]],
## 
##        [[-0.14370087, -1.02088624],
##         [-0.96619053, -0.00625391]]])

comparison

a == b

## array([[[False, False],
##         [False, False]],
## 
##        [[False, False],
##         [False, False]]])

a < 0.5

## array([[[False, False],
##         [False, False]],
## 
##        [[False,  True],
##         [ True, False]]])

np.array_equal(a,b)

## False

aggregateee function

a.sum()

## 4.516533030702009

a.min()

## 0.09530457703340622

a.max(axis=1) # notice the meaning of axis

## array([[0.80125525, 0.7091753 ],
##        [0.71828886, 0.98570303]])

a.cumsum(axis =0)

## array([[[0.80125525, 0.56285654],
##         [0.53585351, 0.7091753 ]],
## 
##        [[1.51954411, 0.65816112],
##         [0.64394947, 1.69487833]]])

a.mean()

## 0.5645666288377511

np.median(a)

## 0.636015921336252

a.std()

## 0.2973464471883372

Copying arrays

h = a.view() # create a view of the array with the same data
np.copy(a) # create a copy of the array

## array([[[0.80125525, 0.56285654],
##         [0.53585351, 0.7091753 ]],
## 
##        [[0.71828886, 0.09530458],
##         [0.10809596, 0.98570303]]])

h = a.copy() # create a deep copy of the array

sorting Arrays

a.sort()

8 subsettiing , Slicing , Indexing

Subsetting

a = np.array((1,2,3))
b = np.array([(1,2,3),(4,5,6)])


a[0]  # select the element at the 1st index

## 1

b[0,0] # select the element at row 1 and column 1

## 1

Slicing

a[0:2] # select items at index 0 to 1

## array([1, 2])

b[0:2,1] # select items at rows 1 and 0 in column 2

## array([2, 5])

b[:1] #  select rows 1 b[0:1,:]

## array([[1, 2, 3]])

a[a<2] # bool indexing

## array([1])

9 array manipulation

transposing array

i = np.transpose(b) # permute(置换) array dimensions 
i.T

## array([[1, 2, 3],
##        [4, 5, 6]])

Changing Array Shape

b.ravel() # flatten the array

## array([1, 2, 3, 4, 5, 6])

b.reshape(3,2) # reshape without change data

## array([[1, 2],
##        [3, 4],
##        [5, 6]])

Adding/Removing Elements

np.append(a,1)

## array([1, 2, 3, 1])

np.insert(a,1,5)

## array([1, 5, 2, 3])

np.delete(a,0)

## array([2, 3])

Combining Arrays

concatenate arrays 连接数组

np.concatenate((a,a),axis=0)

## array([1, 2, 3, 1, 2, 3])

np.concatenate((b,b),axis=0)

## array([[1, 2, 3],
##        [4, 5, 6],
##        [1, 2, 3],
##        [4, 5, 6]])

stack arrays vertically (row wise 按行排列)


a = np.array((1,2,3))
b = np.array([(1,2,3),(4,5,6)])

np.vstack((b.T,b.T))

## array([[1, 4],
##        [2, 5],
##        [3, 6],
##        [1, 4],
##        [2, 5],
##        [3, 6]])

np.row_stack((b.T,b.T))

## array([[1, 4],
##        [2, 5],
##        [3, 6],
##        [1, 4],
##        [2, 5],
##        [3, 6]])

stack arrays horizontally (colnum wise)

np.hstack((b,b))

## array([[1, 2, 3, 1, 2, 3],
##        [4, 5, 6, 4, 5, 6]])

np.column_stack((b,b))

## array([[1, 2, 3, 1, 2, 3],
##        [4, 5, 6, 4, 5, 6]])

Splitting Arrays

np.hsplit(a,3)

## [array([1]), array([2]), array([3])]

np.hsplit(b,3)

## [array([[1],
##        [4]]), array([[2],
##        [5]]), array([[3],
##        [6]])]

np.vsplit(b,2)

## [array([[1, 2, 3]]), array([[4, 5, 6]])]

Tutorial of numpy