library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## 2.1
setwd("C:/Users/gabeg/Documents/Uni/Stat 5003/Week 1")
cereal <- read.csv('cereal.csv')
## 2.2a
head(cereal)
## name mfr type calories protein fat sodium fiber carbo
## 1 100%_Bran N C 70 4 1 130 10.0 5.0
## 2 100%_Natural_Bran Q C 120 3 5 15 2.0 8.0
## 3 All-Bran K C 70 4 1 260 9.0 7.0
## 4 All-Bran_with_Extra_Fiber K C 50 4 0 140 14.0 8.0
## 5 Almond_Delight R C 110 2 2 200 1.0 14.0
## 6 Apple_Cinnamon_Cheerios G C 110 2 2 180 1.5 10.5
## sugars potass vitamins shelf weight cups rating
## 1 6 280 25 3 1 0.33 68.40297
## 2 8 135 0 3 1 1.00 33.98368
## 3 5 320 25 3 1 0.33 59.42551
## 4 0 330 25 3 1 0.50 93.70491
## 5 8 -1 25 3 1 0.75 34.38484
## 6 10 70 25 1 1 0.75 29.50954
class(cereal)
## [1] "data.frame"
## 2.2b
nrow(cereal)
## [1] 77
dim(cereal)
## [1] 77 16
## 2.2c
cereal[["calories"]]
## [1] 70 120 70 50 110 110 110 130 90 90 120 110 120 110 110 110 100 110 110
## [20] 110 100 110 100 100 110 110 100 120 120 110 100 110 100 110 120 120 110 110
## [39] 110 140 110 100 110 100 150 150 160 100 120 140 90 130 120 100 50 50 100
## [58] 100 120 100 90 110 110 80 90 90 110 110 90 110 140 100 110 110 100 100
## [77] 110
cereal$calories
## [1] 70 120 70 50 110 110 110 130 90 90 120 110 120 110 110 110 100 110 110
## [20] 110 100 110 100 100 110 110 100 120 120 110 100 110 100 110 120 120 110 110
## [39] 110 140 110 100 110 100 150 150 160 100 120 140 90 130 120 100 50 50 100
## [58] 100 120 100 90 110 110 80 90 90 110 110 90 110 140 100 110 110 100 100
## [77] 110
## 2.2d
cereal[1:10,]
## name mfr type calories protein fat sodium fiber carbo
## 1 100%_Bran N C 70 4 1 130 10.0 5.0
## 2 100%_Natural_Bran Q C 120 3 5 15 2.0 8.0
## 3 All-Bran K C 70 4 1 260 9.0 7.0
## 4 All-Bran_with_Extra_Fiber K C 50 4 0 140 14.0 8.0
## 5 Almond_Delight R C 110 2 2 200 1.0 14.0
## 6 Apple_Cinnamon_Cheerios G C 110 2 2 180 1.5 10.5
## 7 Apple_Jacks K C 110 2 0 125 1.0 11.0
## 8 Basic_4 G C 130 3 2 210 2.0 18.0
## 9 Bran_Chex R C 90 2 1 200 4.0 15.0
## 10 Bran_Flakes P C 90 3 0 210 5.0 13.0
## sugars potass vitamins shelf weight cups rating
## 1 6 280 25 3 1.00 0.33 68.40297
## 2 8 135 0 3 1.00 1.00 33.98368
## 3 5 320 25 3 1.00 0.33 59.42551
## 4 0 330 25 3 1.00 0.50 93.70491
## 5 8 -1 25 3 1.00 0.75 34.38484
## 6 10 70 25 1 1.00 0.75 29.50954
## 7 14 30 25 2 1.00 1.00 33.17409
## 8 8 100 25 3 1.33 0.75 37.03856
## 9 6 125 25 1 1.00 0.67 49.12025
## 10 5 190 25 3 1.00 0.67 53.31381
## 2.2e
kelloggs <- filter(cereal, mfr == "K")
## 2.3a
cereal2 <- read.csv('cereal.csv', stringsAsFactors = TRUE)
## 2.3b
class(cereal2$mfr)
## [1] "factor"
class(cereal2$type)
## [1] "factor"
## 2.3c
nlevels(cereal2$mfr)
## [1] 7
nlevels(cereal2$type)
## [1] 2
## 2.4a
cereal.calories <- cereal$calories
## 2.4b
length(cereal.calories)
## [1] 77
## 2.4c
cereal.calories[5:10]
## [1] 110 110 110 130 90 90
## 2.4 d
cereal.calories <- c(cereal.calories, "gabe")
## Length increases to 78
length(cereal.calories)
## [1] 78
## 2.5a
cereal_mat <- as.matrix(cereal2)
# remove columns
cereal_int <- subset(cereal2, select = c(-mfr, -type, -name))
cereal_mat2 <- as.matrix(cereal_int)
## 3.1
summary(cereal$sodium)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 130.0 180.0 159.7 210.0 320.0
## 3.2
max(cereal$sodium)
## [1] 320
min(cereal$sodium)
## [1] 0
sd(cereal$sodium)
## [1] 83.8323
mean(cereal$sodium)
## [1] 159.6753
cereal %>%
group_by( mfr) %>%
summarise(mfr_mean =mean(sodium))
## # A tibble: 7 x 2
## mfr mfr_mean
## <chr> <dbl>
## 1 A 0
## 2 G 200.
## 3 K 175.
## 4 N 37.5
## 5 P 146.
## 6 Q 92.5
## 7 R 198.
## 4.1a
boxplot(sodium ~mfr, cereal2)

## 4.2
plot(cereal$sodium, cereal$calories,
main = "Calories vs. Sodium in Cereals",
ylab = "Calories",
xlab = "Sodium",
pch = 19)

write_csv(kelloggs, 'Kelloggs.csv')