Basic and tricky numpy and linear algebra functions
Main preprocessing techniques for Numpy package
import numpy as np
text_path = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
arr = np.genfromtxt(text_path, delimiter=',', dtype='object')
np.save('file.npy',arr)
np.load('/content/file.npy',allow_pickle=True)
stack an array to matrix vertically and horizontally
a = np.random.randint(0,100,size=(3,3))
b = np.random.randint(0,100,size=(1,3))
print(np.vstack([a,b])) # horizontal
np.hstack([a,b.T]) # vertical
replace items that satisfy a condition with another value
a = np.random.randint(0,100,size=(10,10))
print(a)
print(a > 20)
a[a>20] = -1
print(a)
get the intersection between two python numpy arrays
a = np.arange(10)
b = np.arange(5) * 2
intersection = np.intersect1d(a,b)
print(intersection)
remove itema of an array from another array
a = np.arange(10)
b = np.arange(5) * 2
differences = np.setdiff1d(a,b)
print(differences)
extract all elements between a given range
a = np.arange(10)
b = a[(a >= 5) & (a <= 10)]
print(b)
change columns order in a 2d numpy array
a = np.arange(12).reshape((3,4))
print(a)
new_index = [3,0,1,2]
print(a[:,new_index])
count elements
a = np.random.randint(0,100,size=(100,100))
print(np.unique(a,return_counts=True)[0]) #data
print(np.unique(a,return_counts=True)[1]) #count
sort by column
a = np.random.randint(0,100,size=(10,10))
print(a[a[:,3].argsort()]) # sort based on column 3
generate one hot encoding
a = np.random.randint(0,100,size=(10,10))
one_hot = np.eye(np.max(a) + 1) [a]
print(one_hot)
numpy broadcasting . we have two arrays with shape [a] and [b]. we want to compute distance between each two elements using broadcasting
a = np.random.randint(0,10,size=(7))
b = np.random.randint(0,10,size=(5))
result = np.abs(a - b[:,None])
arithmetic operation
a = np.random.randint(0,100,size=(10,10))
b = np.random.randint(1,100,size=(10,10))
print(a+b)
print(a-b)
print(a*b)
print(a/b)
max values of columns
a = np.random.randint(0,100,size=(10,10))
print(a)
print(np.amax(a,axis=0)) # 0 for col 1 for row
check array equality
a = np.random.randint(0,100,size=(10,10))
b = a + np.random.rand(10,10) * 1e-6
print(a-b)
np.allclose(a,b,atol=1e-5)
matrix multiplication
a = np.random.randint(0,100,size=(6,8))
b = np.random.randint(0,100,size=(8,10))
c = a @ b
print(c.shape)
print(c)
compute rank of matrix using numpy.linalg
a = np.random.randint(0,100,size=(6,8))
rank = np.linalg.matrix_rank(a)
print(rank)
compute matrix norm using numpy.linalg
a = np.random.randint(0,100,size=(6,8))
norm = np.linalg.norm(a) # (sum of values^2)^(1/2)
print(norm)
compute matrix inverse using numpy.linalg
a = np.random.randint(0,100,size=(8,8))
inverse = np.linalg.inv(a)
print(inverse)