library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 2.1
setwd("C:/Users/gabeg/Documents/Uni/Stat 5003/Week 1")
cereal <- read.csv('cereal.csv')

## 2.2a
head(cereal)
##                        name mfr type calories protein fat sodium fiber carbo
## 1                 100%_Bran   N    C       70       4   1    130  10.0   5.0
## 2         100%_Natural_Bran   Q    C      120       3   5     15   2.0   8.0
## 3                  All-Bran   K    C       70       4   1    260   9.0   7.0
## 4 All-Bran_with_Extra_Fiber   K    C       50       4   0    140  14.0   8.0
## 5            Almond_Delight   R    C      110       2   2    200   1.0  14.0
## 6   Apple_Cinnamon_Cheerios   G    C      110       2   2    180   1.5  10.5
##   sugars potass vitamins shelf weight cups   rating
## 1      6    280       25     3      1 0.33 68.40297
## 2      8    135        0     3      1 1.00 33.98368
## 3      5    320       25     3      1 0.33 59.42551
## 4      0    330       25     3      1 0.50 93.70491
## 5      8     -1       25     3      1 0.75 34.38484
## 6     10     70       25     1      1 0.75 29.50954
class(cereal)
## [1] "data.frame"
## 2.2b
nrow(cereal)
## [1] 77
dim(cereal)
## [1] 77 16
## 2.2c
cereal[["calories"]]
##  [1]  70 120  70  50 110 110 110 130  90  90 120 110 120 110 110 110 100 110 110
## [20] 110 100 110 100 100 110 110 100 120 120 110 100 110 100 110 120 120 110 110
## [39] 110 140 110 100 110 100 150 150 160 100 120 140  90 130 120 100  50  50 100
## [58] 100 120 100  90 110 110  80  90  90 110 110  90 110 140 100 110 110 100 100
## [77] 110
cereal$calories
##  [1]  70 120  70  50 110 110 110 130  90  90 120 110 120 110 110 110 100 110 110
## [20] 110 100 110 100 100 110 110 100 120 120 110 100 110 100 110 120 120 110 110
## [39] 110 140 110 100 110 100 150 150 160 100 120 140  90 130 120 100  50  50 100
## [58] 100 120 100  90 110 110  80  90  90 110 110  90 110 140 100 110 110 100 100
## [77] 110
## 2.2d
cereal[1:10,]
##                         name mfr type calories protein fat sodium fiber carbo
## 1                  100%_Bran   N    C       70       4   1    130  10.0   5.0
## 2          100%_Natural_Bran   Q    C      120       3   5     15   2.0   8.0
## 3                   All-Bran   K    C       70       4   1    260   9.0   7.0
## 4  All-Bran_with_Extra_Fiber   K    C       50       4   0    140  14.0   8.0
## 5             Almond_Delight   R    C      110       2   2    200   1.0  14.0
## 6    Apple_Cinnamon_Cheerios   G    C      110       2   2    180   1.5  10.5
## 7                Apple_Jacks   K    C      110       2   0    125   1.0  11.0
## 8                    Basic_4   G    C      130       3   2    210   2.0  18.0
## 9                  Bran_Chex   R    C       90       2   1    200   4.0  15.0
## 10               Bran_Flakes   P    C       90       3   0    210   5.0  13.0
##    sugars potass vitamins shelf weight cups   rating
## 1       6    280       25     3   1.00 0.33 68.40297
## 2       8    135        0     3   1.00 1.00 33.98368
## 3       5    320       25     3   1.00 0.33 59.42551
## 4       0    330       25     3   1.00 0.50 93.70491
## 5       8     -1       25     3   1.00 0.75 34.38484
## 6      10     70       25     1   1.00 0.75 29.50954
## 7      14     30       25     2   1.00 1.00 33.17409
## 8       8    100       25     3   1.33 0.75 37.03856
## 9       6    125       25     1   1.00 0.67 49.12025
## 10      5    190       25     3   1.00 0.67 53.31381
## 2.2e

kelloggs <- filter(cereal,  mfr == "K")
## 2.3a
cereal2 <- read.csv('cereal.csv', stringsAsFactors = TRUE)

## 2.3b

class(cereal2$mfr)
## [1] "factor"
class(cereal2$type)
## [1] "factor"
## 2.3c
nlevels(cereal2$mfr)
## [1] 7
nlevels(cereal2$type)
## [1] 2
## 2.4a

cereal.calories <- cereal$calories

## 2.4b 

length(cereal.calories)
## [1] 77
## 2.4c
cereal.calories[5:10]
## [1] 110 110 110 130  90  90
## 2.4 d

cereal.calories <- c(cereal.calories, "gabe") 

## Length increases to 78

length(cereal.calories)
## [1] 78
## 2.5a

cereal_mat <- as.matrix(cereal2)

# remove columns
cereal_int <- subset(cereal2, select = c(-mfr, -type, -name))

cereal_mat2 <- as.matrix(cereal_int)
## 3.1

summary(cereal$sodium)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0   130.0   180.0   159.7   210.0   320.0
## 3.2
max(cereal$sodium)
## [1] 320
min(cereal$sodium)
## [1] 0
sd(cereal$sodium)
## [1] 83.8323
mean(cereal$sodium)
## [1] 159.6753
cereal %>% 
  group_by( mfr) %>%
  summarise(mfr_mean =mean(sodium))
## # A tibble: 7 x 2
##   mfr   mfr_mean
##   <chr>    <dbl>
## 1 A          0  
## 2 G        200. 
## 3 K        175. 
## 4 N         37.5
## 5 P        146. 
## 6 Q         92.5
## 7 R        198.
## 4.1a

boxplot(sodium ~mfr, cereal2)

## 4.2

plot(cereal$sodium,  cereal$calories,
     main = "Calories vs. Sodium in Cereals",
     ylab = "Calories",
     xlab = "Sodium",
     pch = 19)

write_csv(kelloggs, 'Kelloggs.csv')