Basic Image processing functions

Basic preprocessing techniques

In [2]:
import numpy as np
from PIL import Image
import imageio
import skimage
import matplotlib.pyplot as plt
  1. read image from path

  2. convert it to grayscale image

  3. plot image

  4. write image to a png file

In [ ]:
path = 'https://upload.wikimedia.org/wikipedia/en/thumb/7/7d/Lenna_%28test_image%29.png/440px-Lenna_%28test_image%29.png'
np_image = imageio.imread(path)
pil_image = Image.fromarray(np_image)
gray_scaled = pil_image.convert('L')
gray_scaled.save('lena_gray.png')
display(gray_scaled)

plot histogram of gray scaled image

In [ ]:
histogram = np.array(gray_scaled.histogram())
histogram = histogram/sum(histogram) # normalized
x = np.arange(256)
plt.plot(x,histogram)
plt.title('Histogram of lena')
plt.show()

rotate images 60 degrees

In [ ]:
from skimage.transform import rotate
rotated = rotate(np.array(gray_scaled), 60 , resize=True)
plt.imshow(rotated,cmap='gray')
plt.axis('off')
plt.show()

resize images to 600x600 and show both images

In [ ]:
from skimage.transform import rotate,resize
display(gray_scaled)
resized = gray_scaled.resize((600,600))
display(resized)

How many coins are inside image? Use skimage's measure module :)

In [ ]:
from skimage import measure

coins_path = 'https://media.imna.ir/d/2018/09/29/3/1530690.jpg'
np_image = imageio.imread(coins_path)
# it's an open problem . it is not so easy and they can use thresolding , blurring , ... to get good results!

use deeplake module to download required files for the following tasks

In [ ]:
! pip install deeplake
In [ ]:
import deeplake
ds = deeplake.load("hub://activeloop/cifar100-test")
ds.visualize()
  1. read 10000 images of this dataset randomly from test images and save them as png files using a for loop. measure time needed to read and save them.
  2. Use python multithreading to read and save these files and measure the process time.
  3. Use python multiprocessing to read and save these files and measure the process time.
In [3]:
from typing import Callable
from datetime import datetime
from PIL import Image
import os

saving_address = "~/images_cifar100"
os.makedirs(saving_address , exist_ok=True)
def timeit(function:Callable):
    first_time = datetime.now()
    function()
    last_time = datetime.now()
    print(f"process took {(last_time - first_time).total_seconds()} seconds.")
    return
def save_image(index):
    image = ds.images[index].numpy()
    Image.fromarray(image).save(f"{saving_address}/{index}.png")
In [9]:
import random
def save_loop():
    image_count = len(ds.images)
    for i in range(1000):
        index = random.randint(0,int(image_count-1))
        save_image(i)
timeit(save_loop)
process took 0.949402 seconds.
In [10]:
import random
from multiprocessing import Pool

def save_multiprocessing():
    pool = Pool(10)
    image_count = len(ds.images)
    indices = [(random.randint(0,int(image_count-1)),) for i in range(1000)]
    pool.starmap(save_image, indices)
timeit(save_multiprocessing)
process took 0.258296 seconds.
In [11]:
import random
from multiprocessing.pool import ThreadPool
def save_multithreading():
    pool = ThreadPool(10)
    image_count = len(ds.images)
    indices = [(random.randint(0,int(image_count-1)),) for i in range(1000)]
    pool.starmap(save_image, indices)
timeit(save_multithreading)
process took 0.990368 seconds.
  1. read 200 images of this dataset randomly from test images and blur them two times with kernel sizes (3,3) , (5,5) using a for loop. measure time needed to read and save them.
  2. Use python multithreading to read and process these images and measure the process time.
  3. Use python multiprocessing to read and process these images and measure the process time.
In [8]:
import cv2
In [6]:
def blur(index,kernel_size):
    image = ds.images[index].numpy()
    return cv2.blur(image,(kernel_size,kernel_size))
In [9]:
import random
def save_loop():
    image_count = len(ds.images)
    for i in range(200):
        index = random.randint(0,int(image_count-1))
        blur(i,3)
        blur(i,5)
timeit(save_loop)
process took 41.299255 seconds.
In [21]:
import random
from multiprocessing import Pool

def save_multiprocessing():
    pool = Pool(10)
    image_count = len(ds.images)
    indices = [(random.randint(0,int(image_count-1)),3) for i in range(600)]
    pool.starmap(blur, indices)
    indices = [(random.randint(0,int(image_count-1)),5) for i in range(600)]
    pool.starmap(blur, indices)
timeit(save_multiprocessing)
process took 16.460644 seconds.
In [20]:
import random
from multiprocessing.pool import ThreadPool
def save_multithreading():
    pool = ThreadPool(10)
    image_count = len(ds.images)
    indices = [(random.randint(0,int(image_count-1)),3) for i in range(600)]
    pool.starmap(blur, indices)
    indices = [(random.randint(0,int(image_count-1)),5) for i in range(600)]
    pool.starmap(blur, indices)
timeit(save_multithreading)
process took 17.336391 seconds.

Where should we use multithreading? What about multiprocessing?