library(readxl)
getwd()
## [1] "C:/Users/ykim2/Downloads/MC/DATA101"
setwd("C:/Users/ykim2/Downloads/MC/DATA101")
df_mnm <- read_excel("mnm_data.xlsx")
head(df_mnm)
## # A tibble: 6 x 6
##   student_id    id color defect total weight_grams
##   <chr>      <dbl> <chr> <chr>  <dbl>        <dbl>
## 1 AP_LV          1 r     c         27           40
## 2 AP_LV          2 r     l         27           40
## 3 AP_LV          3 r     z         27           40
## 4 AP_LV          4 r     z         27           40
## 5 AP_LV          5 r     z         27           40
## 6 AP_LV          6 o     l         27           40

– Generate simple charts and graphs:

1. Contingency Tables

table(df_mnm$student_id)
## 
##  AP_LV   CFKK   chkl  cr+sd dprtnm elegrz   jbas   MHJH phranw   TSNR 
##     27     24     54     53     54     18     19     52     55     26
table(df_mnm$defect)
## 
##   c   l   m   z 
##  33  42   9 298
table(df_mnm$weight_grams)
## 
##  25  40  42  48  50 
##  24 105  55  54 144
table(df_mnm$color, df_mnm$defect)
##     
##       c  l  m  z
##   bl  7 13  4 58
##   br  2  3  0 51
##   g   5 10  1 35
##   o   6  8  2 71
##   r   6  4  1 36
##   y   7  4  1 47
library("gmodels")
CrossTable(df_mnm$color, df_mnm$defect)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  382 
## 
##  
##              | df_mnm$defect 
## df_mnm$color |         c |         l |         m |         z | Row Total | 
## -------------|-----------|-----------|-----------|-----------|-----------|
##           bl |         7 |        13 |         4 |        58 |        82 | 
##              |     0.001 |     1.761 |     2.214 |     0.557 |           | 
##              |     0.085 |     0.159 |     0.049 |     0.707 |     0.215 | 
##              |     0.212 |     0.310 |     0.444 |     0.195 |           | 
##              |     0.018 |     0.034 |     0.010 |     0.152 |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
##           br |         2 |         3 |         0 |        51 |        56 | 
##              |     1.665 |     1.619 |     1.319 |     1.225 |           | 
##              |     0.036 |     0.054 |     0.000 |     0.911 |     0.147 | 
##              |     0.061 |     0.071 |     0.000 |     0.171 |           | 
##              |     0.005 |     0.008 |     0.000 |     0.134 |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
##            g |         5 |        10 |         1 |        35 |        51 | 
##              |     0.080 |     3.441 |     0.034 |     0.576 |           | 
##              |     0.098 |     0.196 |     0.020 |     0.686 |     0.134 | 
##              |     0.152 |     0.238 |     0.111 |     0.117 |           | 
##              |     0.013 |     0.026 |     0.003 |     0.092 |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
##            o |         6 |         8 |         2 |        71 |        87 | 
##              |     0.306 |     0.256 |     0.001 |     0.144 |           | 
##              |     0.069 |     0.092 |     0.023 |     0.816 |     0.228 | 
##              |     0.182 |     0.190 |     0.222 |     0.238 |           | 
##              |     0.016 |     0.021 |     0.005 |     0.186 |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
##            r |         6 |         4 |         1 |        36 |        47 | 
##              |     0.927 |     0.264 |     0.010 |     0.012 |           | 
##              |     0.128 |     0.085 |     0.021 |     0.766 |     0.123 | 
##              |     0.182 |     0.095 |     0.111 |     0.121 |           | 
##              |     0.016 |     0.010 |     0.003 |     0.094 |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
##            y |         7 |         4 |         1 |        47 |        59 | 
##              |     0.711 |     0.953 |     0.109 |     0.021 |           | 
##              |     0.119 |     0.068 |     0.017 |     0.797 |     0.154 | 
##              |     0.212 |     0.095 |     0.111 |     0.158 |           | 
##              |     0.018 |     0.010 |     0.003 |     0.123 |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
## Column Total |        33 |        42 |         9 |       298 |       382 | 
##              |     0.086 |     0.110 |     0.024 |     0.780 |           | 
## -------------|-----------|-----------|-----------|-----------|-----------|
## 
## 

2. Bar Plots

barplot(table(df_mnm$defect))

# col() means color

barplot(table(df_mnm$color),col=c("blue","brown","green","orange","yellow"), main ="My M&M Color Distribution", xlab = "COLOR", ylab = "Count", border ="gray", density = 7.5)

library(ggplot2)
ggplot(data = df_mnm, aes(x=color, fill = color)) +geom_bar()

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df_mnm %>%
  ggplot(aes( x= color , fill = color)) + geom_bar(alpha = 0.5)

table(df_mnm$defect, df_mnm$color)
##    
##     bl br  g  o  r  y
##   c  7  2  5  6  6  7
##   l 13  3 10  8  4  4
##   m  4  0  1  2  1  1
##   z 58 51 35 71 36 47
bar_data <- table(df_mnm$defect, df_mnm$color)
barplot(bar_data)

# colors from:
# https://r-charts.com/color-palettes/
# arg(x, c("bottomright", "bottom", "bottomleft", "left", : 
# 'arg' should be one of “bottomright”, “bottom”, “bottomleft”, “left”, “topleft”, “top”, “topright”, “right”, “center”

barplot(bar_data, col = c("#ECE0D6","#C4D8F3","#FFCC9E","#A0A6A7"), legend.text = T , args.legend = list(x ="top" , horiz = T) )

3. Histograms

hist(df_mnm$weight_grams, col ="#93BA82", main="MnM bag Weight in Grams", xlab="Weight in Grams", ylab = "Frequency")

4. Box Plots & Comparative Box Plots

fivenum(df_mnm$weight_grams)
## [1] 25 40 48 50 50
summary(df_mnm$weight_grams)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   25.00   40.00   48.00   44.25   50.00   50.00
df_mnm %>%
  filter(color == "bl") %>%
  summarise(number = n() , mean(weight_grams), fivenum(weight_grams))
## # A tibble: 5 x 3
##   number `mean(weight_grams)` `fivenum(weight_grams)`
##    <int>                <dbl>                   <dbl>
## 1     82                 45.1                      25
## 2     82                 45.1                      40
## 3     82                 45.1                      48
## 4     82                 45.1                      50
## 5     82                 45.1                      50
boxplot(df_mnm$weight_grams)

boxplot(df_mnm$weight_grams ~ df_mnm$color, xlab= "Color", ylab = "Grams", main = "Box and Whisker", col = c("blue","brown","green","orange","red","yellow"))

5. Scatterplots (& Linear Models)

# need continuous quantitative variables for a scatter plot.
# load a built in dataset.
data("airquality") 
plot(airquality$Ozone, airquality$Temp, xlab ="Ozone", ylab ="Temp") 

plot(airquality$Ozone, airquality$Wind, xlab = "Ozone", ylab = "Wind", col = "#3F8BBA")

pairs(airquality[ , c("Ozone", "Solar.R","Wind","Temp")])

Thank you!