Activity #3 Group 2

#Student participation: 
# Keydy Sanchez 
# Vanessa Wasveiler

#Load your libraries before getting started
library(tidyverse)

## ── Attaching packages ────────────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.2     ✓ purrr   0.3.3
## ✓ tibble  3.0.1     ✓ dplyr   1.0.0
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0

## ── Conflicts ───────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(scales)

## 
## Attaching package: 'scales'

## The following object is masked from 'package:purrr':
## 
##     discard

## The following object is masked from 'package:readr':
## 
##     col_factor

library(lemon)

## 
## Attaching package: 'lemon'

## The following object is masked from 'package:purrr':
## 
##     %||%

## The following objects are masked from 'package:ggplot2':
## 
##     CoordCartesian, element_render

library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(dplyr)

cereal <- read_csv("cereal.csv")

## Parsed with column specification:
## cols(
##   name = col_character(),
##   mfr = col_character(),
##   type = col_character(),
##   calories = col_double(),
##   protein = col_double(),
##   fat = col_double(),
##   sodium = col_double(),
##   fiber = col_double(),
##   carbo = col_double(),
##   sugars = col_double(),
##   potass = col_double(),
##   vitamins = col_double(),
##   shelf = col_double(),
##   weight = col_double(),
##   cups = col_double(),
##   rating = col_double()
## )

view(cereal)

Use the following dplyr verbs to find produce an output from the dataset:

For this part, you need to make at least 4 graphs. For each graph, briefly describe in 1-2 sentences what is seen. You do not need to use all of the verbs for your actual graphs, but you need to produce outputs that include all of the verbs below (this could just be a print() statement after a pipe where you use the dplyr verb). You can have statements that combine the verbs together (which will likely happen).

select() starts_with() filter() arrange() distinct() slice() mutate() summarise()

cereal %>%
  arrange(sodium)%>%
  filter(fat >= 1) %>%
  ggplot(aes(x = sodium, y = fat)) +
  geom_bar(stat = "identity")+
  scale_y_continuous(labels = number_format())+
  labs(x = "sodium", y = "fat")

#The graph above shows the ralationship between Sodium and fat, it is very noticeable that the cereal with most fat also happens to be the one with more sodium.

cereal %>%
  select("vitamins", "sugars")%>%
  arrange(desc(vitamins))%>%
  distinct(vitamins, .keep_all = TRUE) %>%
  ggplot(aes(x = sugars, y = vitamins)) +
  geom_line(stat = "identity")+
  scale_y_continuous(labels = number_format())+
  labs(x = "sugars", y = "vitamins")

#The graph above shows the ralationship between cereal vitamins and Sugars, it is very noticeable that the cereal with higher amounts of Vitamins have less sugar.

cereal %>%
  slice(1:20) %>% #this will return the top 20 cereal rates by proteins
  ggplot() +
  aes(x = protein, y = rating, color = protein) +
  labs(x = "Protein", y = "Rating") + 
  scale_y_continuous(labels = number_format())+
  scale_x_continuous(labels = number_format())+
  geom_point()

#The graph above shows the relationship between Protein and Ratings, and we can see that the cereal with higher ratings are also the cereal with higher protein amounts.

cereal %>%
  group_by(protein) %>%
  mutate(protein = as.factor(protein)) %>%
  summarise(freq = n()) %>%
  ggplot(aes(x = reorder(protein, freq), y = freq, fill = protein)) +
  geom_bar(stat = "identity", show.legend = FALSE) + labs(x = "Protein", y = "Frequency") + coord_flip()

## `summarise()` ungrouping output (override with `.groups` argument)

#The graph above shows the amount of Proteins and the frequency in which those proteins show in the cereal.

cereal %>%
  group_by(name, calories) %>%
  head(20) %>%
  select(starts_with("C")) %>%
  ggplot(aes(x = calories, y = name)) +
  geom_point()

## Adding missing grouping variables: `name`

#The graph above shows the first 20 names that start with a C and the amount of calories for each.

Convert the following objects into a date class using lubridate and print their corrected outputs

Refer to the notes that show how to use the mdy functions and ymd functions. I provide the code on how to do this.

wrong_format_date1 <- “01-25-1999”
wrong_format_date2 <- 25012005
wrong_format_date3 <- “2005-05-31”

random_string_date_1 <- "01-25-1999"
random_string_date_2 <- "25012005" 
random_string_date_3 <- "2005-05-31"

class(random_string_date_1)

## [1] "character"

class(random_string_date_2)

## [1] "character"

class(random_string_date_3)

## [1] "character"

random_date_1 <- mdy(random_string_date_1) 
random_date_2 <- dmy(random_string_date_2)
random_date_3 <- ymd(random_string_date_3)
class(random_date_1)

## [1] "Date"

class(random_date_2)

## [1] "Date"

class(random_date_3)

## [1] "Date"

print(c(random_date_1, random_date_2, random_date_3))

## [1] "1999-01-25" "2005-01-25" "2005-05-31"

Use the str_replace function to replace all the manufacturer letters with the actual manufacturer names

#Manufacturer of Cereal
cereal_new <- cereal %>%
  mutate(mfr = str_replace(mfr, "G", "General Mills")) %>%
  mutate(mfr = str_replace(mfr, "N", "Nabisco")) %>%
  mutate(mfr = str_replace(mfr, "Q", "Quaker Oats")) %>%
  mutate(mfr = str_replace(mfr, "K", "Kelloggs")) %>%
  mutate(mfr = str_replace(mfr, "R", "Ralston Purina")) %>%
  mutate(mfr = str_replace(mfr, "P", "Post")) %>%
  mutate(mfr = str_replace(mfr, "A", "American Home Food Products")) %>%

  mutate(type = str_replace(type, "C", "Cold")) %>%
  mutate(type = str_replace(type, "H", "Hot")) %>%
view()