K-means with R (Image compression)

Created by Ramses Alexander Coraspe Valdez

Created on July 12, 2020

  1. Define the value of K
  2. Choose K random centroids
  3. Assign data to centroids based on Euclidean distance
  4. Move the K centroids to the Geometric center of their points
  5. Repeat from point 3
In [ ]:
install.packages("tidyverse")
install.packages("cluster")
install.packages("factoextra")
install.packages("NbClust") 
install.packages("jpeg") 
library(tidyverse)  # data manipulation
library(cluster)    # clustering algorithms
library(factoextra) # clustering algorithms & visualization
library("jpeg")
library(ggplot2)
library(NbClust)
In [27]:
justin <-readJPEG("justin.jpg")
In [28]:
dimension <- dim(justin)

Getting the RGB values of the image

In [29]:
RGB <- data.frame(
  x_axis = rep(1:dimension[2], each = dimension[1]),
  y_axis = rep(dimension[1]:1, dimension[2]),
  Red = as.vector(justin[,,1]),
  Green = as.vector(justin[,,2]),
  Blue = as.vector(justin[,,3])
)

Plotting the RGB values

In [30]:
ggplot(data = RGB, aes(x = x_axis, y = y_axis)) +
  geom_point(colour = rgb(RGB[c("Red", "Green", "Blue")])) +
  labs(title = "Original justin") +
  xlab("x-axis") +
  ylab("y-axis")
In [31]:
set.seed(123)

k2 <- kmeans(RGB[c(3,4,5)], centers = 2, iter.max = 25, nstart = 1)
k3 <- kmeans(RGB[c(3,4,5)], centers = 3, iter.max = 25, nstart = 1)
k4 <- kmeans(RGB[c(3,4,5)], centers = 4, iter.max = 25, nstart = 1)
k5 <- kmeans(RGB[c(3,4,5)], centers = 5, iter.max = 25, nstart = 1)
k6 <- kmeans(RGB[c(3,4,5)], centers = 6, iter.max = 25, nstart = 1)
k7 <- kmeans(RGB[c(3,4,5)], centers = 7, iter.max = 25, nstart = 1)
k8 <- kmeans(RGB[c(3,4,5)], centers = 8, iter.max = 25, nstart = 1)
k9 <- kmeans(RGB[c(3,4,5)], centers = 9, iter.max = 25, nstart = 1)
k10 <- kmeans(RGB[c(3,4,5)], centers = 10, iter.max = 25, nstart = 1)

# plots to compare
p1 <- fviz_cluster(k2, geom = "point", data = RGB[c(3,4,5)]) + ggtitle("k = 2")
p2 <- fviz_cluster(k3, geom = "point",  data = RGB[c(3,4,5)]) + ggtitle("k = 3")
p3 <- fviz_cluster(k4, geom = "point",  data = RGB[c(3,4,5)]) + ggtitle("k = 4")
p4 <- fviz_cluster(k5, geom = "point",  data = RGB[c(3,4,5)]) + ggtitle("k = 5")
p5 <- fviz_cluster(k6, geom = "point", data = RGB[c(3,4,5)]) + ggtitle("k = 6")
p6 <- fviz_cluster(k7, geom = "point",  data = RGB[c(3,4,5)]) + ggtitle("k = 7")
p7 <- fviz_cluster(k8, geom = "point",  data = RGB[c(3,4,5)]) + ggtitle("k = 8")
p8 <- fviz_cluster(k9, geom = "point",  data = RGB[c(3,4,5)]) + ggtitle("k = 9")
p9 <- fviz_cluster(k10, geom = "point",  data = RGB[c(3,4,5)]) + ggtitle("k = 10")
  
Warning message:
“Quick-TRANSfer stage steps exceeded maximum (= 61440000)”
In [32]:
library(gridExtra)
grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, nrow = 3, ncol=3)
In [33]:
set.seed(123)

#compute total within-cluster sum of square 
wss <- function(k) {
  kmeans(RGB[c(3,4,5)], k, iter.max = 25, nstart = 1 )$tot.withinss
}

#wss for k = 1 to k = 15
k.values <- 1:15

# wss for 2-15 clusters
wss_values <- map_dbl(k.values, wss)

plot(k.values, wss_values,
       type="b", pch = 19, frame = FALSE, 
       xlab="clusters K",
       ylab="wss-clusters sum of squares")
Warning message:
“Quick-TRANSfer stage steps exceeded maximum (= 61440000)”
Warning message:
“Quick-TRANSfer stage steps exceeded maximum (= 61440000)”
In [34]:
k_cluster <- 3
k_clstr <- kmeans(RGB[, c("Red", "Green", "Blue")],
                      centers = k_cluster)
kcolors <- rgb(k_clstr$centers[k_clstr$cluster,])

ggplot(data = RGB, aes(x = x_axis, y = y_axis)) +
  geom_point(colour = kcolors) +
  labs(title = paste("k-Means Clustering of", k_cluster, "Colours")) +
  xlab("x") +
  ylab("y")
In [35]:
k_cluster <- 4
k_clstr <- kmeans(RGB[, c("Red", "Green", "Blue")],
                      centers = k_cluster)
kcolors <- rgb(k_clstr$centers[k_clstr$cluster,])

ggplot(data = RGB, aes(x = x_axis, y = y_axis)) +
  geom_point(colour = kcolors) +
  labs(title = paste("k-Means Clustering of", k_cluster, "Colours")) +
  xlab("x") +
  ylab("y")
In [36]:
k_cluster <- 5
k_clstr <- kmeans(RGB[, c("Red", "Green", "Blue")],
                      centers = k_cluster)
kcolors <- rgb(k_clstr$centers[k_clstr$cluster,])

ggplot(data = RGB, aes(x = x_axis, y = y_axis)) +
  geom_point(colour = kcolors) +
  labs(title = paste("k-Means Clustering of", k_cluster, "Colours")) +
  xlab("x") +
  ylab("y")
In [37]:
k_cluster <- 6
k_clstr <- kmeans(RGB[, c("Red", "Green", "Blue")],
                      centers = k_cluster)
kcolors <- rgb(k_clstr$centers[k_clstr$cluster,])

ggplot(data = RGB, aes(x = x_axis, y = y_axis)) +
  geom_point(colour = kcolors) +
  labs(title = paste("k-Means Clustering of", k_cluster, "Colours")) +
  xlab("x") +
  ylab("y")
In [38]:
k_cluster <- 7
k_clstr <- kmeans(RGB[, c("Red", "Green", "Blue")],
                      centers = k_cluster)
kcolors <- rgb(k_clstr$centers[k_clstr$cluster,])

ggplot(data = RGB, aes(x = x_axis, y = y_axis)) +
  geom_point(colour = kcolors) +
  labs(title = paste("k-Means Clustering of", k_cluster, "Colours")) +
  xlab("x") +
  ylab("y")
In [39]:
k_cluster <- 8
k_clstr <- kmeans(RGB[, c("Red", "Green", "Blue")],
                      centers = k_cluster)
kcolors <- rgb(k_clstr$centers[k_clstr$cluster,])

ggplot(data = RGB, aes(x = x_axis, y = y_axis)) +
  geom_point(colour = kcolors) +
  labs(title = paste("k-Means Clustering of", k_cluster, "Colours")) +
  xlab("x") +
  ylab("y")