library(tidyverse)
## Warning in system("timedatectl", intern = TRUE): running command 'timedatectl'
## had status 1
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.0 ✓ dplyr 1.0.5
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(reprex)
library(readr)
Finishing from last week:
Movie_Ratings.csv
Download link: https://urldefense.com/v3/__https://www.dropbox.com/s/ebr2gzy95pb9lsx/Movie*20Ratings.csv?dl=1__;JQ!!BZ50a36bapWJ!43rCQauXRJbBH5rrZGHgcy0j9uY3ewa9J5kaubTBMW8B-1bi3zkTlfGEFLsiCIcpWZg$
Variables:
imdb_1000.csv
Download link: https://urldefense.com/v3/__https://www.dropbox.com/s/ov5cntaof9lj9v6/imdb_1000.csv?dl=1__;!!BZ50a36bapWJ!43rCQauXRJbBH5rrZGHgcy0j9uY3ewa9J5kaubTBMW8B-1bi3zkTlfGEFLsisKt5jA0$
Variables:
library(here)
## here() starts at /data/biostat/a089861/A089861/R Trainings
library(readr)
rt <- read_csv("Movie Ratings.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Film = col_character(),
## Genre = col_character(),
## `Rotten Tomatoes Ratings %` = col_double(),
## `Audience Ratings %` = col_double(),
## `Budget (million $)` = col_double(),
## `Year of release` = col_double()
## )
imdb <- read_csv("imdb_1000.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## star_rating = col_double(),
## title = col_character(),
## content_rating = col_character(),
## genre = col_character(),
## duration = col_double(),
## actors_list = col_character()
## )
.
####Stringr
How many twilight films? problem- search through data and apply stringr functions to the column
found strings and looked for matches: results in 4 titles
str_subset (rt$Film, "Twilight")
## [1] "The Twilight Saga: Eclipse" "The Twilight Saga: New Moon"
## [3] "Twilight" "Twilight: Breaking Dawn"
str_subset (rt$Film, "Twilight")
## [1] "The Twilight Saga: Eclipse" "The Twilight Saga: New Moon"
## [3] "Twilight" "Twilight: Breaking Dawn"
rt %>%
filter(str_detect(Film, "Twilight"))
## # A tibble: 4 x 6
## Film Genre `Rotten Tomatoes Ra… `Audience Rating… `Budget (million…
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 The Twilight … Drama 50 74 68
## 2 The Twilight … Drama 27 78 50
## 3 Twilight Roman… 49 82 37
## 4 Twilight: Bre… Roman… 26 68 110
## # … with 1 more variable: Year of release <dbl>
change title of film to Vampire Saga
str_subset (rt$Film, "Twilight")
## [1] "The Twilight Saga: Eclipse" "The Twilight Saga: New Moon"
## [3] "Twilight" "Twilight: Breaking Dawn"
rt %>%
filter(str_detect(Film, "Twilight")) %>%
mutate(
Film = str_replace(Film, "Twilight", replacement = "Vampire"))
## # A tibble: 4 x 6
## Film Genre `Rotten Tomatoes Rat… `Audience Rating… `Budget (million…
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 The Vampire … Drama 50 74 68
## 2 The Vampire … Drama 27 78 50
## 3 Vampire Roman… 49 82 37
## 4 Vampire: Bre… Roman… 26 68 110
## # … with 1 more variable: Year of release <dbl>
need to make a new column and mutate for title length
str_subset (rt$Film, "Twilight")
## [1] "The Twilight Saga: Eclipse" "The Twilight Saga: New Moon"
## [3] "Twilight" "Twilight: Breaking Dawn"
rt %>%
filter(str_detect(Film, "Twilight")) %>%
mutate(
Film = str_replace(Film, "Twilight", replacement = "Vampire"))
## # A tibble: 4 x 6
## Film Genre `Rotten Tomatoes Rat… `Audience Rating… `Budget (million…
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 The Vampire … Drama 50 74 68
## 2 The Vampire … Drama 27 78 50
## 3 Vampire Roman… 49 82 37
## 4 Vampire: Bre… Roman… 26 68 110
## # … with 1 more variable: Year of release <dbl>
rt %>%
mutate(
Title_Length = str_length(Film)
)
## # A tibble: 562 x 7
## Film Genre `Rotten Tomatoes Rat… `Audience Rating… `Budget (million…
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 (500) Days… Comedy 87 81 8
## 2 10,000 B.C. Advent… 9 44 105
## 3 12 Rounds Action 30 52 20
## 4 127 Hours Advent… 93 84 18
## 5 17 Again Comedy 55 70 20
## 6 2012 Action 39 63 200
## 7 27 Dresses Comedy 40 71 30
## 8 30 Days of… Horror 50 57 32
## 9 30 Minutes… Comedy 43 48 28
## 10 50/50 Comedy 93 93 8
## # … with 552 more rows, and 2 more variables: Year of release <dbl>,
## # Title_Length <int>
str_subset (rt$Film, "Twilight")
## [1] "The Twilight Saga: Eclipse" "The Twilight Saga: New Moon"
## [3] "Twilight" "Twilight: Breaking Dawn"
rt %>%
filter(str_detect(Film, "Twilight")) %>%
mutate(
Film = str_replace(Film, "Twilight", replacement = "Vampire"))
## # A tibble: 4 x 6
## Film Genre `Rotten Tomatoes Rat… `Audience Rating… `Budget (million…
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 The Vampire … Drama 50 74 68
## 2 The Vampire … Drama 27 78 50
## 3 Vampire Roman… 49 82 37
## 4 Vampire: Bre… Roman… 26 68 110
## # … with 1 more variable: Year of release <dbl>
rt %>%
mutate(
Title_Length = str_length(Film)
) %>%
top_n(1, Title_Length)
## # A tibble: 1 x 7
## Film Genre `Rotten Tomatoes R… `Audience Ratin… `Budget (millio…
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 The Chronicles o… Adven… 49 63 155
## # … with 2 more variables: Year of release <dbl>, Title_Length <int>
Strinr_Lubridate
library(tidyverse)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
Dates
save your birthday as a datetime object in R
on what day of the week does your birthday fall in 2020? on what day of the week should you plan your 100th birthday? Find the date that is exactly 9 months before the day you were born. If it is on a holiday, think about the implications.
my_bday <- mdy("April 2, 1989")
year(my_bday) <- 2020
my_bday
## [1] "2020-04-02"
wday(my_bday, label = TRUE)
## [1] Thu
## Levels: Sun < Mon < Tue < Wed < Thu < Fri < Sat
my_bday <- mdy("April 2, 1989")
year(my_bday) <- 2020
my_bday_100 <- my_bday + years(100)
wday(my_bday_100, label = TRUE)
## [1] Tue
## Levels: Sun < Mon < Tue < Wed < Thu < Fri < Sat
my_bday <- mdy("April 2, 1989")
year(my_bday) <- 2020
wday(my_bday_100, label = TRUE)
## [1] Tue
## Levels: Sun < Mon < Tue < Wed < Thu < Fri < Sat
my_bday - months(9)
## [1] "2019-07-02"