Basic and tricky numpy and linear algebra functions

Main preprocessing techniques for Numpy package

In [2]:

import numpy as np

read array from text
save as npy
read from npy

In [ ]:

text_path = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
arr = np.genfromtxt(text_path, delimiter=',', dtype='object')
np.save('file.npy',arr)
np.load('/content/file.npy',allow_pickle=True)

stack an array to matrix vertically and horizontally

In [ ]:

a = np.random.randint(0,100,size=(3,3))
b = np.random.randint(0,100,size=(1,3))
print(np.vstack([a,b])) # horizontal
np.hstack([a,b.T]) # vertical

replace items that satisfy a condition with another value

In [ ]:

a = np.random.randint(0,100,size=(10,10))
print(a)
print(a > 20)
a[a>20] = -1
print(a)

get the intersection between two python numpy arrays

In [ ]:

a = np.arange(10)
b = np.arange(5) * 2
intersection = np.intersect1d(a,b)
print(intersection)

[0 2 4 6 8]

remove itema of an array from another array

In [ ]:

a = np.arange(10)
b = np.arange(5) * 2
differences = np.setdiff1d(a,b)
print(differences)

extract all elements between a given range

In [ ]:

a = np.arange(10)
b = a[(a >= 5) & (a <= 10)]
print(b)

change columns order in a 2d numpy array

In [ ]:

a = np.arange(12).reshape((3,4))
print(a)
new_index = [3,0,1,2]
print(a[:,new_index])

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[ 3  0  1  2]
 [ 7  4  5  6]
 [11  8  9 10]]

count elements

In [ ]:

a = np.random.randint(0,100,size=(100,100))
print(np.unique(a,return_counts=True)[0]) #data 
print(np.unique(a,return_counts=True)[1]) #count

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 96 97 98 99]
[103 108  94  85  99 110 103 101  84 100  99 116 120 110  95  81 100 108
 114 109  99  93 112  99 105 103 120  90  97 107  75  97 116 107 107 111
 103  99  95 116  94 111  90 106  90 117 120  94 109 102 117 102  91 112
  86 105 120 104  91  96  95  91  92 108 104  83 108  90 103  99  92  84
  92 105  97  92 103  73  92  99 105 101  96 104 107  81  81  85  99  92
  95 115 106 101  92 116  86 103 103  83]

sort by column

In [ ]:

a = np.random.randint(0,100,size=(10,10))
print(a[a[:,3].argsort()]) # sort based on column 3

Out[ ]:

array([[42, 71, 42,  4, 94, 21, 30, 44, 49, 52],
       [16, 18, 37, 11, 77, 24, 28, 88, 83, 41],
       [ 0, 72, 59, 22, 24, 40, 54,  7, 34, 22],
       [76, 12, 68, 57, 39, 91, 34, 38, 84, 77],
       [67, 54, 74, 59, 36, 48, 65, 88, 93, 90],
       [81, 71, 32, 71, 97, 35, 31, 85, 13, 78],
       [34, 26, 52, 73, 16, 64, 67, 45, 58, 92],
       [30, 48,  8, 77, 35,  5, 75,  8, 99, 73],
       [44, 35, 88, 95, 29, 80, 61, 19, 48, 40],
       [79, 51, 85, 97, 35, 76, 88,  1, 93, 41]])

generate one hot encoding

In [ ]:

a = np.random.randint(0,100,size=(10,10))
one_hot = np.eye(np.max(a) + 1) [a]
print(one_hot)

numpy broadcasting . we have two arrays with shape [a] and [b]. we want to compute distance between each two elements using broadcasting

In [22]:

a = np.random.randint(0,10,size=(7))
b = np.random.randint(0,10,size=(5))
result = np.abs(a - b[:,None])

arithmetic operation

In [ ]:

a = np.random.randint(0,100,size=(10,10))
b = np.random.randint(1,100,size=(10,10))
print(a+b)
print(a-b)
print(a*b)
print(a/b)

[[ 90 178 107  50  65 143 131 186 127  83]
 [ 84  27  11  62  92  19 149 109 135 103]
 [105  45  81  84  57 110  79  76  64 111]
 [ 95  38  15 140  57 155 172 135  45  65]
 [134 157 108 133  15  94 151 100  63  41]
 [ 68  62  95  41  95  25  86 122  50 139]
 [100 115  95 119 154  89  92 105 125 107]
 [ 36  92  56  90  81 143 122  88 107 105]
 [126 156  32 197 140  80  98 113 150 113]
 [148  89  88 145  80 141  46  66  96  34]]
[[-68   4  87   2 -55 -21 -33   8   5  11]
 [  0  23  -7 -58  34  -3 -39  -3  45 -91]
 [-31   5 -81  64 -49 -68   9 -42 -46 -23]
 [ 61 -18  -5 -14   3 -37 -10 -21  33  -7]
 [ 26   3  -6 -11   1 -68   1 -26  11   7]
 [  6  54  33 -33 -33   7  40 -70  42  37]
 [-28 -65 -73  31   6 -73  24  51 -37 -57]
 [ 20 -58  48 -44  51 -23  76 -40 -73 -53]
 [-20  14  28   1 -40 -34  12  -7 -14 -39]
 [ 34  21  -4 -25 -26  15 -26  36 -24 -28]]
[[ 869 7917  970  624  300 5002 4018 8633 4026 1692]
 [1764   50   18  120 1827   88 5170 2968 4050  582]
 [2516  500    0  740  212 1869 1540 1003  495 2948]
 [1326  280   50 4851  810 5664 7371 4446  234 1044]
 [4320 6160 2907 4392   56 1053 5700 2331  962  408]
 [1147  232 1984  148 1984  144 1449 2496  184 4488]
 [2304 2250  924 3300 5920  648 1972 2106 3564 2050]
 [ 224 1275  208 1541  990 4980 2277 1536 1530 2054]
 [3869 6035   60 9702 4500 1311 2365 3180 5576 2812]
 [5187 1870 1932 5100 1431 4914  360  765 2160   93]]
[[ 0.13924051  1.04597701  9.7         1.08333333  0.08333333  0.74390244
   0.59756098  1.08988764  1.08196721  1.30555556]
 [ 1.         12.5         0.22222222  0.03333333  2.17241379  0.72727273
   0.58510638  0.94642857  2.          0.06185567]
 [ 0.54411765  1.25        0.          7.4         0.0754717   0.23595506
   1.25714286  0.28813559  0.16363636  0.65671642]
 [ 4.58823529  0.35714286  0.5         0.81818182  1.11111111  0.61458333
   0.89010989  0.73076923  6.5         0.80555556]
 [ 1.48148148  1.03896104  0.89473684  0.84722222  1.14285714  0.16049383
   1.01333333  0.58730159  1.42307692  1.41176471]
 [ 1.19354839 14.5         2.06451613  0.10810811  0.484375    1.77777778
   2.73913043  0.27083333 11.5         1.7254902 ]
 [ 0.5625      0.27777778  0.13095238  1.70454545  1.08108108  0.09876543
   1.70588235  2.88888889  0.54320988  0.30487805]
 [ 3.5         0.22666667 13.          0.34328358  4.4         0.72289157
   4.30434783  0.375       0.18888889  0.32911392]
 [ 0.7260274   1.1971831  15.          1.01020408  0.55555556  0.40350877
   1.27906977  0.88333333  0.82926829  0.48684211]
 [ 1.59649123  1.61764706  0.91304348  0.70588235  0.50943396  1.23809524
   0.27777778  3.4         0.6         0.09677419]]

max values of columns

In [ ]:

a = np.random.randint(0,100,size=(10,10))
print(a)
print(np.amax(a,axis=0)) # 0 for col 1 for row

[[74 29 28 11 99 50 94 83 97 73]
 [ 2 19 62 27 91 57 15  0 98 69]
 [75 86 81 59 29 49  1  3 78 52]
 [67 90  8 68 38 77 47 95 66 20]
 [89 99  6 54 22 65 24 53 75 82]
 [70 48 98 19 13 45 75 13 69 88]
 [28  1 29 15  5 71 58 91 75 77]
 [95 81 37 34 66 88  2  9 12 33]
 [50 23 86 80 62 46 25 28 95 23]
 [ 7 82 75 81 51 22 83 52 15 64]]
[95 99 98 81 99 88 94 95 98 88]

check array equality

In [ ]:

a = np.random.randint(0,100,size=(10,10))
b = a + np.random.rand(10,10) * 1e-6
print(a-b)
np.allclose(a,b,atol=1e-5)

[[-1.26445663e-08 -6.58008005e-07 -1.02242865e-07 -5.47877633e-07
  -7.93865063e-08 -1.70871949e-07 -7.26900028e-07 -1.06834364e-09
  -4.70375937e-07 -6.56350092e-07]
 [-1.76913538e-07 -8.45903898e-07 -9.57245732e-08 -2.97325187e-07
  -5.58811074e-07 -3.41782879e-07 -9.87414474e-07 -9.74521202e-07
  -5.29563671e-07 -7.29567972e-07]
 [-6.03957474e-07 -7.96866125e-07 -4.33304777e-07 -5.60491465e-07
  -8.11025522e-08 -7.07274239e-10 -1.97026198e-07 -7.55636620e-09
  -8.48557065e-07 -2.26685295e-07]
 [-4.21914063e-07 -6.65508981e-08 -9.08814965e-07 -2.56023512e-07
  -7.53916858e-07 -8.06983792e-07 -9.86670287e-07 -9.70603168e-07
  -6.36795392e-07 -9.20729697e-07]
 [-5.31153209e-07 -3.53329366e-07 -1.38553176e-07 -1.83723152e-07
  -3.94262116e-08 -8.55937275e-07 -3.43371607e-07 -5.04920592e-07
  -4.82102678e-08 -4.20591334e-07]
 [-1.39187165e-07 -4.37494649e-07 -3.64646837e-07 -4.27938105e-07
  -1.44174919e-07 -6.93649710e-07 -8.32878655e-07 -4.19684071e-07
  -3.93690179e-07 -1.79903410e-07]
 [-9.37215368e-07 -2.14787299e-07 -2.70435901e-07 -5.42733549e-07
  -8.66697434e-07 -1.07147351e-07 -8.38087857e-08 -7.98219961e-07
  -9.47668227e-07 -6.20323423e-07]
 [-4.61954528e-08 -3.58186796e-07 -6.20112161e-07 -5.90136437e-08
  -4.58581759e-07 -2.96463440e-08 -3.71494160e-08 -2.95964966e-07
  -5.37779485e-07 -9.12578557e-07]
 [-1.16606742e-07 -5.43867316e-07 -3.85912507e-07 -1.41604666e-07
  -2.89928295e-07 -2.97845212e-07 -8.43704534e-07 -1.22180950e-08
  -7.05420902e-07 -5.41231230e-07]
 [-8.23213540e-07 -1.71034330e-07 -4.02960382e-07 -6.52580624e-07
  -8.32744718e-09 -5.52129748e-07 -2.62375110e-07 -6.87711029e-07
  -2.66816656e-07 -3.61775086e-07]]

Out[ ]:

True

matrix multiplication

In [ ]:

a = np.random.randint(0,100,size=(6,8))
b = np.random.randint(0,100,size=(8,10))
c = a @ b
print(c.shape)
print(c)

(6, 10)
[[27198 23758 26631 29236 30024 25866 28988 32381 27548 32317]
 [21822 16812 22564 19234 19868 19804 21421 22619 22236 24522]
 [17532 16197 21448 17042 15852 17436 17364 20786 19006 24099]
 [20293 17073 21748 21726 22578 20436 22478 25206 22143 24774]
 [19366 17936 18270 23051 22347 20072 19989 25219 22209 27155]
 [15235 10493 10283 16152 16106 14536 17147 15988 11755 14822]]

compute rank of matrix using numpy.linalg

In [ ]:

a = np.random.randint(0,100,size=(6,8))
rank = np.linalg.matrix_rank(a)
print(rank)

compute matrix norm using numpy.linalg

In [ ]:

a = np.random.randint(0,100,size=(6,8))
norm = np.linalg.norm(a) # (sum of values^2)^(1/2)
print(norm)

382.5637201826645

compute matrix inverse using numpy.linalg

In [ ]:

a = np.random.randint(0,100,size=(8,8))
inverse = np.linalg.inv(a)
print(inverse)

[[ 1.17937680e-01  4.61561894e-02  3.94586052e-02 -6.70461534e-02
   1.11310578e-01  3.39641258e-03 -1.66527139e-01 -8.22527452e-02]
 [-5.19643711e-03 -8.01532694e-03 -3.35553931e-03  8.23834292e-04
   9.71575917e-03  5.32460250e-03  2.01551128e-03  4.06441426e-03]
 [-1.15504960e-01 -3.26333886e-02 -4.09521147e-02  5.70758726e-02
  -1.11930120e-01  3.09269453e-03  1.67429570e-01  7.60452759e-02]
 [-6.76965668e-02 -2.05573832e-02 -3.12952065e-02  3.96597215e-02
  -7.73702766e-02 -9.24532065e-03  1.07045983e-01  5.90432445e-02]
 [ 2.21267104e-03  4.33261913e-04  1.10425880e-02 -9.89101802e-03
  -3.67313797e-03  6.25130637e-03  9.50929210e-03 -1.48468858e-02]
 [-1.06658330e-02  2.03784586e-03  9.96814673e-04  9.26019771e-03
  -8.41063714e-03 -6.19393811e-03  1.68735677e-02 -2.97425966e-05]
 [-3.72946188e-02 -2.24650922e-02 -9.48245832e-03  2.44323063e-02
  -2.75577904e-02  2.30789345e-03  3.47559697e-02  3.82561038e-02]
 [ 6.89952844e-02  1.76330443e-02  1.93416236e-02 -2.43030104e-02
   5.75984243e-02  9.40999135e-04 -9.55941929e-02 -4.20581579e-02]]

The blank notebook

Main preprocessing techniques for Numpy package

Notebook Exercise

Basic and tricky numpy functions

Authors

Amin Rezaee

Author

Search Results:

Main preprocessing techniques for Numpy package

Amin Rezaee