numpy 提供了一组高效处理数组的工具.

这个图有点误导性. 当数组是二维的时候, axis0 表示行, axis 表示列. 当数组是三维的时候, axis0 表示的则是不同的二维数组, axis1表示行, axis2 表示列

1 创建array 数组

import numpy as np

a = np.array([1,2,3]) #  1 D array
a
## array([1, 2, 3])
b = np.array([(1,2,3,4),(2,3,4,5)], dtype = float)  # 2 D array
b
## array([[1., 2., 3., 4.],
##        [2., 3., 4., 5.]])
c = np.array([[(1,2,3),(2,3,4)],[(1,1,1),(2,2,2)]], dtype = float) # 3 D array
c
## array([[[1., 2., 3.],
##         [2., 3., 4.]],
## 
##        [[1., 1., 1.],
##         [2., 2., 2.]]])

2 Create special array

use this function can create array quickly

np.zeros((2,2,3))   # Create an array of zeros
## array([[[0., 0., 0.],
##         [0., 0., 0.]],
## 
##        [[0., 0., 0.],
##         [0., 0., 0.]]])
np.ones((2,2,3)) # Create an array of 1
## array([[[1., 1., 1.],
##         [1., 1., 1.]],
## 
##        [[1., 1., 1.],
##         [1., 1., 1.]]])
np.arange(10,25,5) # 1 D  array 
## array([10, 15, 20])
np.linspace(0,2,9) # start 0 , stop 2 , number is 9 
## array([0.  , 0.25, 0.5 , 0.75, 1.  , 1.25, 1.5 , 1.75, 2.  ])
np.full((2,2,2),7) # create an array of number 7
## array([[[7, 7],
##         [7, 7]],
## 
##        [[7, 7],
##         [7, 7]]])
np.eye(3) # 2 D 
## array([[1., 0., 0.],
##        [0., 1., 0.],
##        [0., 0., 1.]])
np.random.random((2,2,2)) # random create array
## array([[[0.54355924, 0.95804347],
##         [0.81281377, 0.67417269]],
## 
##        [[0.52049942, 0.6786279 ],
##         [0.7376216 , 0.08358359]]])
np.random.rand(10)
## array([0.9838564 , 0.7021055 , 0.44837238, 0.6726763 , 0.46093914,
##        0.25151976, 0.52308899, 0.16730325, 0.28905009, 0.11388428])
np.empty((2,2)) # create empty 
## array([[-2.00000000e+000, -2.00389937e+000],
##        [ 9.88131292e-324,  2.78134232e-309]])

3 I/O

Saving and Loading On Disk

np.save("my_array",a)
np.savez("array.npz",a,b)
np.load("my_array.npy")

Saving and loading text files

np.loadtxt("file.txt)

np.genfromtxt("file.csv", delimiter = ",")

np.savetxt("file.txt",a,delimiter= "")

4 Data types

  1. np.int64
  2. np.float32
  3. np.complex
  4. np.bool
  5. np.object
  6. np.string_
  7. np.unicode_

5 inspecting array

 
a
## array([1, 2, 3])
a.shape # show array dimensions
## (3,)
len(a) # array length
## 3
a.ndim # number of array dimensions
## 1
a.size # number of array elements
## 3
a.dtype.name
## 'int64'
a.astype(int)
## array([1, 2, 3])

6 Asking for help

  1. np.info(np.random)
  2. help(np.random)
  3. ?np.random (in Rstudio)

7 Array Mathematics

Arithmetic算数 operations


a = np.random.random((2,2,2))
b = np.random.random((2,2,2))

a-b # subtraction 减法
## array([[[ 0.07485318,  0.21861378],
##         [ 0.37748246,  0.50106951]],
## 
##        [[ 0.04316713, -0.5588189 ],
##         [-0.27114428,  0.38546714]]])
np.subtract(a,b)
## array([[[ 0.07485318,  0.21861378],
##         [ 0.37748246,  0.50106951]],
## 
##        [[ 0.04316713, -0.5588189 ],
##         [-0.27114428,  0.38546714]]])
a+b
## array([[[1.52765731, 0.90709931],
##         [0.69422455, 0.91728109]],
## 
##        [[1.3934106 , 0.74942806],
##         [0.48733621, 1.58593893]]])
np.add(a,b) # addition
## array([[[1.52765731, 0.90709931],
##         [0.69422455, 0.91728109]],
## 
##        [[1.3934106 , 0.74942806],
##         [0.48733621, 1.58593893]]])
a/b # division
## array([[[1.10304648, 1.63505699],
##         [3.38353206, 3.4077634 ]],
## 
##        [[1.06393977, 0.14569814],
##         [0.28503294, 1.64219274]]])
np.divide(a,b)
## array([[[1.10304648, 1.63505699],
##         [3.38353206, 3.4077634 ]],
## 
##        [[1.06393977, 0.14569814],
##         [0.28503294, 1.64219274]]])
a*b # multiplication 乘法
## array([[[0.58203347, 0.19375929],
##         [0.08486368, 0.14758349]],
## 
##        [[0.48493243, 0.06234096],
##         [0.04099434, 0.59165434]]])
np.multiply(a,b)

# other useful function
## array([[[0.58203347, 0.19375929],
##         [0.08486368, 0.14758349]],
## 
##        [[0.48493243, 0.06234096],
##         [0.04099434, 0.59165434]]])
np.exp(a)
## array([[[2.22833628, 1.75568052],
##         [1.70890618, 2.03231451]],
## 
##        [[2.0509208 , 1.09999384],
##         [1.11415466, 2.67969514]]])
np.sqrt(a)
## array([[[0.89512862, 0.75023766],
##         [0.73202015, 0.84212546]],
## 
##        [[0.84751924, 0.30871439],
##         [0.3287795 , 0.99282578]]])
np.sin(a)
## array([[[0.71823006, 0.53360425],
##         [0.5105751 , 0.65120813]],
## 
##        [[0.65809727, 0.09516037],
##         [0.10788557, 0.83366057]]])
np.log10(a)
## array([[[-0.09622911, -0.24960228],
##         [-0.27095392, -0.1492464 ]],
## 
##        [[-0.14370087, -1.02088624],
##         [-0.96619053, -0.00625391]]])

comparison

a == b
## array([[[False, False],
##         [False, False]],
## 
##        [[False, False],
##         [False, False]]])
a < 0.5
## array([[[False, False],
##         [False, False]],
## 
##        [[False,  True],
##         [ True, False]]])
np.array_equal(a,b)
## False

aggregateee function

a.sum()
## 4.516533030702009
a.min()
## 0.09530457703340622
a.max(axis=1) # notice the meaning of axis
## array([[0.80125525, 0.7091753 ],
##        [0.71828886, 0.98570303]])
a.cumsum(axis =0)
## array([[[0.80125525, 0.56285654],
##         [0.53585351, 0.7091753 ]],
## 
##        [[1.51954411, 0.65816112],
##         [0.64394947, 1.69487833]]])
a.mean()
## 0.5645666288377511
np.median(a)
## 0.636015921336252
a.std()
## 0.2973464471883372

Copying arrays

h = a.view() # create a view of the array with the same data
np.copy(a) # create a copy of the array 
## array([[[0.80125525, 0.56285654],
##         [0.53585351, 0.7091753 ]],
## 
##        [[0.71828886, 0.09530458],
##         [0.10809596, 0.98570303]]])
h = a.copy() # create a deep copy of the array 

sorting Arrays

a.sort()

8 subsettiing , Slicing , Indexing

Subsetting

a = np.array((1,2,3))
b = np.array([(1,2,3),(4,5,6)])


a[0]  # select the element at the 1st index
## 1
b[0,0] # select the element at row 1 and column 1
## 1

Slicing

a[0:2] # select items at index 0 to 1
## array([1, 2])
b[0:2,1] # select items at rows 1 and 0 in column 2
## array([2, 5])
b[:1] #  select rows 1 b[0:1,:]
## array([[1, 2, 3]])
a[a<2] # bool indexing
## array([1])

9 array manipulation

transposing array

i = np.transpose(b) # permute(置换) array dimensions 
i.T
## array([[1, 2, 3],
##        [4, 5, 6]])

Changing Array Shape

b.ravel() # flatten the array
## array([1, 2, 3, 4, 5, 6])
b.reshape(3,2) # reshape without change data
## array([[1, 2],
##        [3, 4],
##        [5, 6]])

Adding/Removing Elements

np.append(a,1) 
## array([1, 2, 3, 1])
np.insert(a,1,5)
## array([1, 5, 2, 3])
np.delete(a,0)
## array([2, 3])

Combining Arrays

concatenate arrays 连接数组

np.concatenate((a,a),axis=0)
## array([1, 2, 3, 1, 2, 3])
np.concatenate((b,b),axis=0)
## array([[1, 2, 3],
##        [4, 5, 6],
##        [1, 2, 3],
##        [4, 5, 6]])

stack arrays vertically (row wise 按行排列)


a = np.array((1,2,3))
b = np.array([(1,2,3),(4,5,6)])

np.vstack((b.T,b.T))
## array([[1, 4],
##        [2, 5],
##        [3, 6],
##        [1, 4],
##        [2, 5],
##        [3, 6]])
np.row_stack((b.T,b.T))
## array([[1, 4],
##        [2, 5],
##        [3, 6],
##        [1, 4],
##        [2, 5],
##        [3, 6]])

stack arrays horizontally (colnum wise)

np.hstack((b,b))
## array([[1, 2, 3, 1, 2, 3],
##        [4, 5, 6, 4, 5, 6]])
np.column_stack((b,b))
## array([[1, 2, 3, 1, 2, 3],
##        [4, 5, 6, 4, 5, 6]])

Splitting Arrays

np.hsplit(a,3)
## [array([1]), array([2]), array([3])]
np.hsplit(b,3)
## [array([[1],
##        [4]]), array([[2],
##        [5]]), array([[3],
##        [6]])]
np.vsplit(b,2)
## [array([[1, 2, 3]]), array([[4, 5, 6]])]