knitr::opts_chunk$set(echo = TRUE)

# load libraries (you may need to install plotly)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly) 
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
# Paste your string of values into the parentheses for each color
red <- c(0,207,1,125,2,101,3,175,4,118,5,97,6,200,7,210,8,192,9,141,10,144,11,189,12,149,13,212,14,127,15,240)

green <- c(0,147,1,100,2,73,3,133,4,64,5,79,6,149,7,163,8,133,9,79,10,104,11,135,12,95,13,171,14,95,15,188)

blue <- c(0,133,1,98,2,78,3,105,4,68,5,97,6,116,7,147,8,113,9,43,10,71,11,106,12,63,13,149,14,73,15,143)

# create R, G, B variables to use later based on values pasted above, removing indices
R <- red[c(FALSE, TRUE)]
G <- green[c(FALSE, TRUE)]
B <- blue[c(FALSE, TRUE)]

# Create a data frame of the RGB values
df <- tibble(R,G,B)

# make a version of the data frame with hex_codes
df_hex <- df %>% mutate(hex_code = rgb(R/255,G/255,B/255)) 


#create a 3-D scatterplot of the data
plot_ly(df_hex, x = ~R, y = ~G, z= ~B, marker = list(color = ~hex_code)) %>% 
        add_markers() %>% 
        layout(scene = list(xaxis = list(title = 'Red', nticks = 4, range = list(0,255)),
                            yaxis = list(title = 'Green', nticks = 4, range = list(0,255)),
                            zaxis = list(title = 'Blue', nticks = 4, range = list(0,255))))
# cluster data with kmeans
df$cluster <- factor(kmeans(df,2)$cluster)

# view a small sample of data to see how it worked
sample_n(df, 10)
## # A tibble: 10 × 4
##        R     G     B cluster
##    <dbl> <dbl> <dbl> <fct>  
##  1   144   104    71 2      
##  2   212   171   149 1      
##  3   240   188   143 1      
##  4   141    79    43 2      
##  5    97    79    97 2      
##  6   118    64    68 2      
##  7   127    95    73 2      
##  8   101    73    78 2      
##  9   125   100    98 2      
## 10   200   149   116 1
# create 3-D scatterplot color-coded by cluster
plot_ly(df, x=~R, y=~G, z=~B, color=~cluster) %>%
     add_markers(size=1.5)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
# create new data frame grouped by cluster with average colors for each skin tone cluster
df_clust <- df %>% group_by(cluster) %>% mutate(hex_avg = rgb(mean(R)/255, mean(G)/255, mean(B)/255)) %>% group_by(cluster, hex_avg) %>% summarize()
## `summarise()` has grouped output by 'cluster'. You can override using the
## `.groups` argument.
# create color palate
hex_pal <- df_clust$hex_avg

# create a bar graph of the clusters
df %>% group_by(cluster) %>% 
    ggplot(aes(cluster, fill=cluster)) + 
    geom_bar() + scale_fill_manual(values=hex_pal) + 
    labs(title = "Frequency of Skin Tones", x = "Skin Tone Cluster", y = "Frequency") + 
    theme_minimal()