图像处理入门100题（一）

图像处理入门100题（一）本篇记录了GIthub上的ImageProcessing 100 Wen的问题1-10的自己写的答案，注释里包含了一些自己的感悟。为了方便，注释是用英文写的。问题序号题目1通道交换2灰度化3二值化4大津二值化算法（最佳阈值）5HSV与RGB的变换6减色处理7平均池化8最大池化9高斯滤...

Treasure_XY

618人浏览 · 2019-12-04 11:10:34

Treasure_XY · 2019-12-04 11:10:34 发布

图像处理入门100题（一）

本篇记录了自己写的GIthub上的ImageProcessing 100 Wen的问题1-10的答案，注释里包含了一些自己的感悟。为了方便，注释是用英文写的。

问题序号	题目
1	通道交换
2	灰度化
3	二值化
4	大津二值化算法（最佳阈值）
5	HSV与RGB的变换
6	减色处理
7	平均池化
8	最大池化
9	高斯滤波
10	中值滤波

Python答案如下，结尾包含了问题的验证。

import cv2.cv2
import numpy as np


# Question One
# change channels
# from RGB to BGR
# please note that the images are stored by BGR
# Answer

def RGB2BGR(img):
    out = img.copy()
    out[:, :] = out[:, :, (2, 1, 0)]  # from 0:B 1:G 2:R <= 2:R 1:G 0:B
    return out


# Question Two
# gray scale of images
# Y = 0.2126 * R + 0.7152 * G + 0.0722 * B
# Answer

def RGB2GRAY(img):
    out = 0.2126 * img[:, :, 2] + 0.7152 * img[:, :, 1] + 0.0722 * img[:, :, 0]
    out = out.astype(np.uint8)
    return out


# Question Three
# Binary image
# threshold: if y < t then y := 0 else y := 1
# Answer

def BINARY(img, th=128):
    out = RGB2GRAY(img)

    out[out < th] = 0  # please not this usage, out < th return a matrix which has the same dimension with out
    out[out >= th] = 255  # out < th is consisted of True and False

    return out


# Question Four
# find best threshold for binary image
# Otsu's method:
# there are two classes: class0 that is less than threshold and class1 that is greater and equal than threshold
# w0: the rate of number of pixels of class0, the same as w1
# M0: the mean value of pixel value of class0, the same as M1
# objective function: make Sb^2 = w0 * w1 * (M0 - M1)^2 biggest
# Answer

def BESTTH(img):
    best_t = 0
    max_sb = 0
    H = img.shape[0]
    W = img.shape[1]
    gray = RGB2GRAY(img)

    for t in range(1, 255):

        class0 = gray[gray < t]
        class1 = gray[gray >= t]

        w0 = len(class0) / (H * W)
        w1 = len(class1) / (H * W)

        M0 = np.mean(class0) if len(class0) > 0 else 0
        M1 = np.mean(class1) if len(class1) > 0 else 0

        sb = w0 * w1 * np.square(M0 - M1)

        if sb > max_sb:
            best_t = t
            max_sb = sb

    return best_t


# Question Five
# HSV to RGB, RGB to HSV
# Answer
def RGB2HSV(img):
    hsv = np.zeros_like(img, dtype=np.float)

    img = img / 255.0  # RGB in [0, 1]
    max = np.max(img, axis=2)  # max = max(R, G, B)
    min = np.min(img, axis=2)  # min = min(R, G, B)

    min_index = np.argmin(img, axis=2)

    # Compute H
    hsv[:, :, 0][min == max] = 0  # if min == max, h = 0
    # Note: hsv[:, :, 0] is a H * W * 1 matrix, hsv[:, :, 0][condition1] is those values that meet condition1
    condition1 = (min_index == 0)  # if min == B
    hsv[:, :, 0][condition1] = (
            60 * (img[:, :, 1][condition1] - img[:, :, 2][condition1]) / (max[condition1] - min[condition1]) + 60)

    condition2 = (min_index == 1)  # if min == G
    hsv[:, :, 0][condition2] = (
            60 * (img[:, :, 2][condition2] - img[:, :, 0][condition2]) / (max[condition2] - min[condition2]) + 300)

    condition3 = (min_index == 2)  # if min == R
    hsv[:, :, 0][condition3] = (
            60 * (img[:, :, 0][condition3] - img[:, :, 1][condition3]) / (max[condition3] - min[condition3]) + 180)

    # Compute S
    hsv[:, :, 1] = (max - min)

    # Compute V
    hsv[:, :, 2] = max

    return hsv


def HSV2RGB(img):
    bgr = np.zeros_like(img, dtype=np.float)  # please note that the sequence of bgr

    H = img[:, :, 0]
    S = img[:, :, 1]
    V = img[:, :, 2]

    C = S
    H = H / 60.0
    X = C * (1 - np.abs(H % 2 - 1))
    ZERO = np.zeros_like(H)
    VALUE = ([C, X, ZERO],
             [X, C, ZERO],
             [ZERO, C, X],
             [ZERO, X, C],
             [X, ZERO, C],
             [C, ZERO, X])

    for i in range(0, 6):
        condition = ((H >= i) & (H < i + 1))
        bgr[:, :, 2][condition] = (V - C)[condition] + VALUE[i][0][condition]  # R
        bgr[:, :, 1][condition] = (V - C)[condition] + VALUE[i][1][condition]  # G
        bgr[:, :, 0][condition] = (V - C)[condition] + VALUE[i][2][condition]  # B

    bgr = np.clip(bgr, 0, 1)
    bgr = (bgr * 255).astype(dtype=np.uint8)

    return bgr


# Question Six
# color reduced
# make rgb only represented by 32 96 160 224
# Answer

def COLORREDUCED(img):
    # use a classical algorithm to present this relationship
    # it is a good solution for a reduced function like this
    # in this example, 64 is the scale, so 64/2 = 32 is bias
    out = img // 64 * 64 + 32  # Map 0-64 to 32, 64-128 to 64 and so on.

    return out


# Question Seven
# average pooling
# this is an important operation in convolution neural network
# use the average pixel value of a block(kernel) to represent a block(kernel)
# Answer

def AVERAGEPOOLING(img, k=8):
    H, W, C = img.shape

    Hn = int(H / k)
    Wn = int(W / k)

    for y in range(Hn):
        for x in range(Wn):
            for c in range(C):
                img[k * y: k * (y + 1), k * x: k * (x + 1), c] = np.mean(
                    img[k * y: k * (y + 1), k * x: k * (x + 1), c]).astype(np.uint8)

    return img


# Question Eight
# max pooling
# as the name
# Answer

def MAXPOOLING(img, k=8):
    H, W, C = img.shape

    Hn = int(H / k)
    Wn = int(W / k)

    for y in range(Hn):
        for x in range(Wn):
            for c in range(C):
                img[k * y: k * (y + 1), k * x: k * (x + 1), c] = np.max(img[k * y: k * (y + 1), k * x: k * (x + 1), c])

    return img


# Question Nine
# Gaussian Filter
# to smooth the image or denoising using gaussian filter
# Answer

def GAUSSIANFILTER(img, k=3, sigma=1.3):
    # an image is actually a matrix, and RGB image is a H*W*3 matrix, H*W is the spatial information, 3 is the channel
    # just imagine that every point of a x-y plane, there is a 3*1 array, storing RGB information
    if len(img.shape) == 3:
        H, W, C = img.shape
    else:
        # this operation didn't change the information of img(matrix), just add a dimension
        np.expand_dims(img, axis=-1)  # gray scale image??
        H, W, C = img.shape

    # Zero Padding: to keep the image size after convolution
    pad = int((k - 1) / 2)  # k is generally odd??
    out = np.zeros((H + pad * 2, W + pad * 2, C), dtype=np.float)
    out[pad:H + pad, pad:W + pad] = img.astype(np.float)

    # Compute Gaussian kernel: g(x, y, sigma) = 1/(2*PI*sigma^2) * exp(-(x^2+y+2)/2*sigma^2))
    # please note that x and y is the value that put the center of kennel at the (0, 0)
    kernel = np.zeros((k, k), dtype=np.float)
    for y in range(-pad, k - pad):
        for x in range(-pad, k - pad):
            kernel[y + pad:x + pad] = np.exp(-(np.square(x) + np.square(y)) / (2 * np.square(sigma)))

    kernel = kernel / (2 * np.pi * np.square(sigma))

    # normalization: Sum(g) = 1
    kernel = kernel / kernel.sum()

    # Gaussian filter
    # please note that must use the copy of img
    out_old = out.copy()

    for y in range(H):
        for x in range(W):
            for c in range(C):
                out[pad + y, pad + x, c] = np.sum(kernel * out_old[y:y + k, x:x + k, c])
    out = np.clip(out, 0, 255)
    out = out[pad: pad + H, pad: pad + W].astype(np.uint8)

    return out


# Question Ten
# Median filter
# use the median value to filter
# Answer

def MEDIANFILTER(img, k=3):
    if (len(img.shape) == 3):
        H, W, C = img.shape
    else:
        np.expand_dims(img, axis=-1)
        H, W, C = img.shape

    # Zero Padding
    pad = int((k - 1) / 2)
    out = np.zeros((H + 2 * pad, W + 2 * pad, C), dtype=np.float)
    out[pad:H + pad, pad:W + pad] = img.astype(np.float)

    # Median filter
    out_old = out.copy()

    for y in range(H):
        for x in range(W):
            for c in range(C):
                out[pad + y, pad + x, c] = np.median(out_old[y:y + k, x: x + k, c])

    out = np.clip(out, 0, 255)
    out = out[pad:H + pad, pad:W + pad].astype(np.uint8)

    return out


# Test


# Answer One
# img = cv2.imread("imori.jpg")
# img1 = RGB2BGR(img)
#
# cv2.imshow("RGN2BGR", np.hstack((img, img1)))
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Answer Two
# img = cv2.imread("imori.jpg")
# img2 = RGB2GRAY(img)
#
# cv2.imshow("RGB2GRAY", img2)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Answer Three
# img = cv2.imread("imori.jpg")
# img3 = BINARY(img)
#
# cv2.imshow("BINARY", img3)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Answer Four
# img = cv2.imread("imori.jpg")
# best_t = BESTTH(img)
# print(best_t)
# img4 = BINARY(img, th=best_t)
#
# cv2.imshow("BINARY", img4)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Answer Five
# img = cv2.imread("imori.jpg")
#
# hsv = RGB2HSV(img)
# hsv[:, :, 0] = (hsv[:, :, 0] + 180) % 360
# img5 = HSV2RGB(hsv)
#
# cv2.imshow("RGB2HSV -> H+180 -> HSV2BGR", img5)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Answer Six
# img = cv2.imread("imori.jpg")
#
# img6 = COLORREDUCED(img)
#
# cv2.imshow("COLORREDUCED", img6)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Answer Seven
# img = cv2.imread("imori.jpg")
#
# img7 = AVERAGEPOOLING(img)
#
# cv2.imshow("AVERAGEPOOLING", img7)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Answer Eight
# img = cv2.imread("imori.jpg")
#
# img8 = MAXPOOLING(img)
#
# cv2.imshow("MAXPOOLING", img8)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Answer Nine
# img = cv2.imread("imori_noise.jpg")
#
# img9 = GAUSSIANFILTER(img)
#
# cv2.imshow("GAUSSIANFILTER", img9)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

# Answer 10
# img = cv2.imread("imori_noise.jpg")
#
# img10 = MEDIANFILTER(img)
# 
# cv2.imshow("MEDIANFILTER", img10)
# cv2.waitKey(0)
# cv2.destroyAllWindows()