– Generate simple charts and graphs:
1. Contingency Tables
table(df_mnm$student_id)
##
## AP_LV CFKK chkl cr+sd dprtnm elegrz jbas MHJH phranw TSNR
## 27 24 54 53 54 18 19 52 55 26
table(df_mnm$defect)
##
## c l m z
## 33 42 9 298
table(df_mnm$weight_grams)
##
## 25 40 42 48 50
## 24 105 55 54 144
table(df_mnm$color, df_mnm$defect)
##
## c l m z
## bl 7 13 4 58
## br 2 3 0 51
## g 5 10 1 35
## o 6 8 2 71
## r 6 4 1 36
## y 7 4 1 47
library("gmodels")
CrossTable(df_mnm$color, df_mnm$defect)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 382
##
##
## | df_mnm$defect
## df_mnm$color | c | l | m | z | Row Total |
## -------------|-----------|-----------|-----------|-----------|-----------|
## bl | 7 | 13 | 4 | 58 | 82 |
## | 0.001 | 1.761 | 2.214 | 0.557 | |
## | 0.085 | 0.159 | 0.049 | 0.707 | 0.215 |
## | 0.212 | 0.310 | 0.444 | 0.195 | |
## | 0.018 | 0.034 | 0.010 | 0.152 | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## br | 2 | 3 | 0 | 51 | 56 |
## | 1.665 | 1.619 | 1.319 | 1.225 | |
## | 0.036 | 0.054 | 0.000 | 0.911 | 0.147 |
## | 0.061 | 0.071 | 0.000 | 0.171 | |
## | 0.005 | 0.008 | 0.000 | 0.134 | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## g | 5 | 10 | 1 | 35 | 51 |
## | 0.080 | 3.441 | 0.034 | 0.576 | |
## | 0.098 | 0.196 | 0.020 | 0.686 | 0.134 |
## | 0.152 | 0.238 | 0.111 | 0.117 | |
## | 0.013 | 0.026 | 0.003 | 0.092 | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## o | 6 | 8 | 2 | 71 | 87 |
## | 0.306 | 0.256 | 0.001 | 0.144 | |
## | 0.069 | 0.092 | 0.023 | 0.816 | 0.228 |
## | 0.182 | 0.190 | 0.222 | 0.238 | |
## | 0.016 | 0.021 | 0.005 | 0.186 | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## r | 6 | 4 | 1 | 36 | 47 |
## | 0.927 | 0.264 | 0.010 | 0.012 | |
## | 0.128 | 0.085 | 0.021 | 0.766 | 0.123 |
## | 0.182 | 0.095 | 0.111 | 0.121 | |
## | 0.016 | 0.010 | 0.003 | 0.094 | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## y | 7 | 4 | 1 | 47 | 59 |
## | 0.711 | 0.953 | 0.109 | 0.021 | |
## | 0.119 | 0.068 | 0.017 | 0.797 | 0.154 |
## | 0.212 | 0.095 | 0.111 | 0.158 | |
## | 0.018 | 0.010 | 0.003 | 0.123 | |
## -------------|-----------|-----------|-----------|-----------|-----------|
## Column Total | 33 | 42 | 9 | 298 | 382 |
## | 0.086 | 0.110 | 0.024 | 0.780 | |
## -------------|-----------|-----------|-----------|-----------|-----------|
##
##
2. Bar Plots
barplot(table(df_mnm$defect))

# col() means color
barplot(table(df_mnm$color),col=c("blue","brown","green","orange","yellow"), main ="My M&M Color Distribution", xlab = "COLOR", ylab = "Count", border ="gray", density = 7.5)

library(ggplot2)
ggplot(data = df_mnm, aes(x=color, fill = color)) +geom_bar()

library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df_mnm %>%
ggplot(aes( x= color , fill = color)) + geom_bar(alpha = 0.5)

table(df_mnm$defect, df_mnm$color)
##
## bl br g o r y
## c 7 2 5 6 6 7
## l 13 3 10 8 4 4
## m 4 0 1 2 1 1
## z 58 51 35 71 36 47
bar_data <- table(df_mnm$defect, df_mnm$color)
barplot(bar_data)

# colors from:
# https://r-charts.com/color-palettes/
# arg(x, c("bottomright", "bottom", "bottomleft", "left", :
# 'arg' should be one of “bottomright”, “bottom”, “bottomleft”, “left”, “topleft”, “top”, “topright”, “right”, “center”
barplot(bar_data, col = c("#ECE0D6","#C4D8F3","#FFCC9E","#A0A6A7"), legend.text = T , args.legend = list(x ="top" , horiz = T) )

3. Histograms
hist(df_mnm$weight_grams, col ="#93BA82", main="MnM bag Weight in Grams", xlab="Weight in Grams", ylab = "Frequency")

4. Box Plots & Comparative Box Plots
fivenum(df_mnm$weight_grams)
## [1] 25 40 48 50 50
summary(df_mnm$weight_grams)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 25.00 40.00 48.00 44.25 50.00 50.00
df_mnm %>%
filter(color == "bl") %>%
summarise(number = n() , mean(weight_grams), fivenum(weight_grams))
## # A tibble: 5 x 3
## number `mean(weight_grams)` `fivenum(weight_grams)`
## <int> <dbl> <dbl>
## 1 82 45.1 25
## 2 82 45.1 40
## 3 82 45.1 48
## 4 82 45.1 50
## 5 82 45.1 50
boxplot(df_mnm$weight_grams)

boxplot(df_mnm$weight_grams ~ df_mnm$color, xlab= "Color", ylab = "Grams", main = "Box and Whisker", col = c("blue","brown","green","orange","red","yellow"))

5. Scatterplots (& Linear Models)
# need continuous quantitative variables for a scatter plot.
# load a built in dataset.
data("airquality")
plot(airquality$Ozone, airquality$Temp, xlab ="Ozone", ylab ="Temp")

plot(airquality$Ozone, airquality$Wind, xlab = "Ozone", ylab = "Wind", col = "#3F8BBA")

pairs(airquality[ , c("Ozone", "Solar.R","Wind","Temp")])
