knitr::opts_chunk$set(echo = TRUE)
# load libraries (you may need to install plotly)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
# Paste your string of values into the parentheses for each color
red <- c(0,207,1,125,2,101,3,175,4,118,5,97,6,200,7,210,8,192,9,141,10,144,11,189,12,149,13,212,14,127,15,240)
green <- c(0,147,1,100,2,73,3,133,4,64,5,79,6,149,7,163,8,133,9,79,10,104,11,135,12,95,13,171,14,95,15,188)
blue <- c(0,133,1,98,2,78,3,105,4,68,5,97,6,116,7,147,8,113,9,43,10,71,11,106,12,63,13,149,14,73,15,143)
# create R, G, B variables to use later based on values pasted above, removing indices
R <- red[c(FALSE, TRUE)]
G <- green[c(FALSE, TRUE)]
B <- blue[c(FALSE, TRUE)]
# Create a data frame of the RGB values
df <- tibble(R,G,B)
# make a version of the data frame with hex_codes
df_hex <- df %>% mutate(hex_code = rgb(R/255,G/255,B/255))
#create a 3-D scatterplot of the data
plot_ly(df_hex, x = ~R, y = ~G, z= ~B, marker = list(color = ~hex_code)) %>%
add_markers() %>%
layout(scene = list(xaxis = list(title = 'Red', nticks = 4, range = list(0,255)),
yaxis = list(title = 'Green', nticks = 4, range = list(0,255)),
zaxis = list(title = 'Blue', nticks = 4, range = list(0,255))))
# cluster data with kmeans
df$cluster <- factor(kmeans(df,2)$cluster)
# view a small sample of data to see how it worked
sample_n(df, 10)
## # A tibble: 10 × 4
## R G B cluster
## <dbl> <dbl> <dbl> <fct>
## 1 144 104 71 2
## 2 212 171 149 1
## 3 240 188 143 1
## 4 141 79 43 2
## 5 97 79 97 2
## 6 118 64 68 2
## 7 127 95 73 2
## 8 101 73 78 2
## 9 125 100 98 2
## 10 200 149 116 1
# create 3-D scatterplot color-coded by cluster
plot_ly(df, x=~R, y=~G, z=~B, color=~cluster) %>%
add_markers(size=1.5)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
# create new data frame grouped by cluster with average colors for each skin tone cluster
df_clust <- df %>% group_by(cluster) %>% mutate(hex_avg = rgb(mean(R)/255, mean(G)/255, mean(B)/255)) %>% group_by(cluster, hex_avg) %>% summarize()
## `summarise()` has grouped output by 'cluster'. You can override using the
## `.groups` argument.
# create color palate
hex_pal <- df_clust$hex_avg
# create a bar graph of the clusters
df %>% group_by(cluster) %>%
ggplot(aes(cluster, fill=cluster)) +
geom_bar() + scale_fill_manual(values=hex_pal) +
labs(title = "Frequency of Skin Tones", x = "Skin Tone Cluster", y = "Frequency") +
theme_minimal()
