Code Example for Community Service

Author

Ailu Yu & Eva Zhang

Import Dataset

library(readr)
library(psych)
data <- read_csv("starbucks_drinkMenu_expanded.csv")
Rows: 242 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (9): Beverage_category, Beverage, Beverage_prep, Total Fat (g), Vitamin ...
dbl (9): Calories, Trans Fat (g), Saturated Fat (g), Sodium (mg), Total Carb...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
describe(data)
                        vars   n   mean     sd median trimmed    mad min   max
Beverage_category*         1 242   5.02   3.03    5.0    5.02   4.45   1   9.0
Beverage*                  2 242  17.70   9.92   17.0   17.66  13.34   1  33.0
Beverage_prep*             3 242   6.80   3.93    8.0    6.79   5.93   1  13.0
Calories                   4 242 193.87 102.86  185.0  190.10 111.19   0 510.0
Total Fat (g)*             5 242  11.33   7.70   13.0   11.20  10.38   1  24.0
Trans Fat (g)              6 242   1.31   1.64    0.5    1.00   0.74   0   9.0
Saturated Fat (g)          7 242   0.04   0.07    0.0    0.02   0.00   0   0.3
Sodium (mg)                8 242   6.36   8.63    5.0    4.59   7.41   0  40.0
Total Carbohydrates (g)    9 242 128.88  82.30  125.0  124.05  81.54   0 340.0
Cholesterol (mg)          10 242  35.99  20.80   34.0   35.07  22.24   0  90.0
Dietary Fibre (g)         11 242   0.81   1.45    0.0    0.48   0.00   0   8.0
Sugars (g)                12 242  32.96  19.73   32.0   31.89  17.79   0  84.0
Protein (g)               13 242   6.98   4.87    6.0    6.59   4.45   0  20.0
Vitamin A (% DV)*         14 242   5.61   3.55    5.0    5.52   4.45   1  11.0
Vitamin C (% DV)*         15 242   2.03   2.16    1.0    1.49   0.00   1  10.0
Calcium (% DV)*           16 242   5.43   3.57    5.0    5.08   4.45   1  14.0
Iron (% DV)*              17 242   6.39   5.26    4.0    5.63   2.97   1  18.0
Caffeine (mg)*            18 241  18.39  12.31   16.0   18.44  19.27   1  36.0
                        range  skew kurtosis   se
Beverage_category*        8.0 -0.05    -1.50 0.19
Beverage*                32.0  0.02    -1.41 0.64
Beverage_prep*           12.0 -0.17    -1.19 0.25
Calories                510.0  0.37    -0.13 6.61
Total Fat (g)*           23.0  0.02    -1.51 0.49
Trans Fat (g)             9.0  1.67     2.79 0.11
Saturated Fat (g)         0.3  1.84     2.49 0.00
Sodium (mg)              40.0  1.66     2.39 0.55
Total Carbohydrates (g) 340.0  0.47    -0.30 5.29
Cholesterol (mg)         90.0  0.38    -0.42 1.34
Dietary Fibre (g)         8.0  2.86     9.04 0.09
Sugars (g)               84.0  0.46    -0.25 1.27
Protein (g)              20.0  0.70    -0.27 0.31
Vitamin A (% DV)*        10.0  0.19    -1.54 0.23
Vitamin C (% DV)*         9.0  2.05     3.18 0.14
Calcium (% DV)*          13.0  0.69    -0.39 0.23
Iron (% DV)*             17.0  0.92    -0.40 0.34
Caffeine (mg)*           35.0 -0.03    -1.54 0.79

Data Cleanning

library(ggplot2)

Attaching package: 'ggplot2'
The following objects are masked from 'package:psych':

    %+%, alpha
library(tidyverse)
── Attaching packages
───────────────────────────────────────
tidyverse 1.3.2 ──
✔ tibble  3.2.1     ✔ dplyr   1.1.3
✔ tidyr   1.3.0     ✔ stringr 1.5.0
✔ purrr   1.0.2     ✔ forcats 0.5.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ ggplot2::%+%()   masks psych::%+%()
✖ ggplot2::alpha() masks psych::alpha()
✖ dplyr::filter()  masks stats::filter()
✖ dplyr::lag()     masks stats::lag()
library(ggside)
Registered S3 method overwritten by 'ggside':
  method from   
  +.gg   ggplot2
data$cholesterol <- data$`Cholesterol (mg)`
data$Caffeine <- data$`Caffeine (mg)`
data <- data %>%
  filter(!is.na(Caffeine)) %>%
  mutate(
    drink_type = case_when(
      Caffeine %in% c("varies", "Varies") ~ "Tea",
      Caffeine == '0' ~ "Caffeine-Free",
      TRUE ~ "Coffee"
    )
  )

Joint Plots Example

Joinplot: Scatter Plot with Side Histograms

ggplot(data, aes(Calories, cholesterol)) +
  geom_point(size = 1.5, alpha = 0.7) +
  geom_smooth(aes(color = NULL), se = TRUE) +
  
  geom_xsidehistogram(aes(y = after_stat(count)),
                      bins = 20,
                      color = "black",
                      alpha = 0.2) + 
  geom_ysidehistogram(aes(x = after_stat(count)),
                      bins = 20,
                      color = "black",
                      alpha = 0.2) + 
  
  labs(title = "Joinplot: Scatter Plot with Side Histograms",
       x = "Calories",
       y = "Cholesterol(mg)") + 
  theme_bw() +
  theme(ggside.panel.scale.x = 0.3,
        ggside.panel.scale.y = 0.3)
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Joinplot: Scatter Plot with Side Density Plot

ggplot(data, aes(Calories, cholesterol, color = drink_type)) +
  geom_point(size = 1.5, alpha = 0.7) +
  geom_smooth(aes(color = NULL), se = TRUE) +
  geom_xsidedensity(aes(y = after_stat(density), fill = drink_type),
                    size = 0.7,
                    alpha = 0.2) +
  geom_ysidedensity(aes(x = after_stat(density), fill = drink_type),
                    size = 0.7,
                    alpha = 0.2) +
  labs(title = "Joinplot: Scatter Plot with Side Density Plot",
       x = "Calories",
       y = "Cholesterol") + 
  theme_bw() +
  theme(ggside.panel.scale.x = 0.3,
        ggside.panel.scale.y = 0.3) +
  scale_color_brewer(palette = "Set1")
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Joint Plot with Hexagonal Heatmap

ggplot(data, aes(Calories,cholesterol)) +
  geom_hex(bins = 15)+
  scale_fill_viridis_c()+
  geom_xsideboxplot(aes( color = drink_type),
                    orientation = 'y',
                    varwidth = TRUE) +
  geom_ysideboxplot(aes(color = drink_type),
                    orientation = 'x',
                    varwidth = TRUE) +
  theme_bw()+
  theme(ggside.panel.scale.x = 0.3,
        ggside.panel.scale.y = 0.3,
        aspect.ratio = 1)+
  labs(title = "Joint Plot with Hexagonal Heatmap")+
  ylab("Cholesterol (mg)")+
  xlab("Calories")
Warning in FUN(X[[i]], ...): no non-missing arguments to max; returning -Inf

Warning in FUN(X[[i]], ...): no non-missing arguments to max; returning -Inf

Warning in FUN(X[[i]], ...): no non-missing arguments to max; returning -Inf

Warning in FUN(X[[i]], ...): no non-missing arguments to max; returning -Inf

Jointplot: Hexagonal Heatmap with Side Boxplot

ggplot(data, aes(Calories,cholesterol)) +
  geom_hex(bins = 15)+
  scale_fill_viridis_c()+
  
  geom_density_2d(bins = 6,color = 'red',linewidth= 0.75)+
  
  geom_xsideboxplot(aes(color = drink_type),
                    orientation = 'y',
                    varwidth = TRUE) +
  geom_ysideboxplot(aes(color = drink_type),
                    orientation = 'x',
                    varwidth = TRUE) +
  
  theme_bw()+
  theme(ggside.panel.scale.x = 0.2,
        ggside.panel.scale.y = 0.2,
        aspect.ratio = 1)+
  labs(title = "Jointplot: Hexagonal Heatmap with Side Boxplot")+
  ylab("Cholesterol (mg)")+
  xlab("Calories")
Warning in FUN(X[[i]], ...): no non-missing arguments to max; returning -Inf

Warning in FUN(X[[i]], ...): no non-missing arguments to max; returning -Inf

Warning in FUN(X[[i]], ...): no non-missing arguments to max; returning -Inf

Warning in FUN(X[[i]], ...): no non-missing arguments to max; returning -Inf

Jointplot and Facet Grid

ggplot(data, aes(Calories,cholesterol)) +
  geom_point(aes(color = drink_type)) +
  geom_xsidedensity(aes(y=stat(density),
                        color = drink_type)) +
  geom_ysidedensity(aes(x=stat(density), 
                        color = drink_type)) +
  facet_grid(drink_type~.) +
  labs(title = "Jointplot and Facet Grid")+
  theme_bw()+
  theme(ggside.panel.scale.x = 0.3,
        ggside.panel.scale.y = 0.15)+
  ylab("Cholesterol (mg)")+
  xlab("Calories")
Warning: `stat(density)` was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(density)` instead.