Basic statistics functions

Basic knowledge for statistics

In [38]:
from sklearn.datasets import make_blobs
import numpy as np
import matplotlib.pyplot as plt

# create dataset
X, _ = make_blobs(
   n_samples=450, n_features=2,
   centers=1, cluster_std=0.75,
   shuffle=True, random_state=0
)
coefficients = [np.random.rand(),np.random.rand()]
X = X * coefficients

plot the data using maplotlib scatter

In [40]:
# plot
plt.scatter(
   X[:, 0], X[:, 1],
   c= _, marker='o',
   edgecolor='black', s=50
)
plt.show()

compute mean , covariance matrix and correlation matrix for data

In [41]:
mean = X.mean(axis=0)
covariance = np.cov(X.T)
correlation = np.corrcoef(X.T)

generate samples using covariance matrix and mean using numpy.random.multivariate_normal

In [42]:
X_new = np.random.multivariate_normal(mean,covariance,450)

plot the generated samples created from previous steps using matplotlib scatter

In [43]:
# plot
plt.scatter(
   X_new[:, 0], X_new[:, 1],
   c=_, marker='o',
   edgecolor='black', s=50
)
plt.show()