Basic statistics functions
Basic knowledge for statistics
from sklearn.datasets import make_blobs
import numpy as np
import matplotlib.pyplot as plt
# create dataset
X, _ = make_blobs(
n_samples=450, n_features=2,
centers=1, cluster_std=0.75,
shuffle=True, random_state=0
)
coefficients = [np.random.rand(),np.random.rand()]
X = X * coefficients
plot the data using maplotlib scatter
# plot
plt.scatter(
X[:, 0], X[:, 1],
c= _, marker='o',
edgecolor='black', s=50
)
plt.show()
compute mean , covariance matrix and correlation matrix for data
mean = X.mean(axis=0)
covariance = np.cov(X.T)
correlation = np.corrcoef(X.T)
generate samples using covariance matrix and mean using numpy.random.multivariate_normal
X_new = np.random.multivariate_normal(mean,covariance,450)
plot the generated samples created from previous steps using matplotlib scatter
# plot
plt.scatter(
X_new[:, 0], X_new[:, 1],
c=_, marker='o',
edgecolor='black', s=50
)
plt.show()