K-means algorithm -- Application

Keywords: Python network

1. Image compression using K-means algorithm

Read a picture

Observe image file size, memory occupied, image data structure, linearization

Clustering image pixel colors with kmeans

Get the color category of each pixel, and the color of each category

Compressed image generation: replace the original pixel color with the middle gather in clustering, and restore to 2D

Observe the file size of the compressed picture, accounting for the memory size

from sklearn.datasets import load_sample_image
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.image as img
import sys
# Read a photo from the library
china = load_sample_image('china.jpg')

# Show original picture
plt.imshow(china)
plt.show()

# Compress pictures
image = china[::3, ::3] 
x = image.reshape(-1, 3)
plt.imshow(image)
plt.show()

#Using machine learning K-Means algorithm to compress
# Define cluster center
n_colors = 64 
model = KMeans(n_colors)
#Forecast
label = model.fit_predict(x)
colors = model.cluster_centers_
# Then the original color value is replaced by the color of cluster center.
new_image = colors[label].reshape(image.shape)
# Picture to 8-bit unsigned integer
new_image = new_image.astype(np.uint8)
plt.imshow(new_image)
plt.show()

First original

Second compression

The third image compression using KMeans algorithm

Save pictures

View the memory size of original and compressed images

 

2. Observe the problems that can be solved with K-means in learning and life.

Complete an application case from data model training test prediction.

import matplotlib.pyplot as plt
import sklearn.datasets as ds
import matplotlib.colors
#Create data
N=800
centers=4
# Generate 2000 (default) 2D sample point sets, 5 center points
data,y=ds.make_blobs(N,centers=centers,random_state=0)
#Raw data distribution
#pylot uses rc configuration files to customize various default properties of a drawing, including form size, points per inch, line width, color, style, coordinate axis, coordinate and network properties, text, font, etc.
matplotlib.rcParams['font.sans-serif'] = [u'SimHei']
matplotlib.rcParams['axes.unicode_minus'] = False
cm = matplotlib.colors.ListedColormap(list('rgbm'))
plt.scatter(data[:,0],data[:,1],c=y,cmap=cm)
plt.title(u'Raw data distribution')
plt.grid()
plt.show()

#Using K-Means algorithm
from sklearn.cluster import KMeans
# n_clusters=k
model=KMeans(n_clusters=3,init='k-means++')
#Cluster prediction
y_pre=model.fit_predict(data)
plt.scatter(data[:,0],data[:,1],c=y_pre,cmap=cm)
plt.title(u'K-Means clustering')
plt.grid()
plt.show()

 

#View original data
print(data[:,0],data[:,1])

# View Post forecast data
print(y_pre)

 

 

Posted by troinfo on Sat, 18 Apr 2020 08:44:21 -0700