Basic and tricky Pandas functions

Main preprocessing techniques for pandas package

In [ ]:
import pandas as pd
import numpy as np

read csv file and print first five samples

In [ ]:
dataset_path = 'https://datahub.io/machine-learning/iris/r/iris.csv'

save dataframe as tsv file ( tab separated)

In [ ]:

sort based on petalwidth and petallength

In [ ]:
# sort by petalwidth and petal length

select sepallength of first five samples

In [ ]:

group by: get mean of each class using groupby function

In [ ]:

save first ten rows to a csv file

In [ ]:

describe overview of dataset main details using describe function

In [ ]:

find memory usage of each column

In [ ]:

convert type of sepalwidth to int

In [ ]:

count values of petalwidth

In [ ]:

merge two dataframes outer join

In [ ]:
import random
data1 = [[i ,random.randint(0,20)] for i in range(100)]
data2 = [[random.randint(0,20) , i] for i in range(40,140)]

dataframe1 = pd.DataFrame(data1 , columns=['id1','code1'])
dataframe2 = pd.DataFrame(data2 , columns=['code2','id2'])
# your code here

select columns 2 and 3 of row 100 to row 150

In [ ]:

select petalwidth , sepallength where class is Iris-setosa

In [ ]:

select all rows with petalwidth > 0.1 and sepallength < 5.0

In [ ]: