bike_data <- read.csv("bike_sharing_data (1).csv")
library(stringr)
library(readr)
str(bike_data)
## 'data.frame':    17379 obs. of  13 variables:
##  $ datetime  : chr  "1/1/2011 0:00" "1/1/2011 1:00" "1/1/2011 2:00" "1/1/2011 3:00" ...
##  $ season    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ holiday   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ workingday: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ weather   : int  1 1 1 1 1 2 1 1 1 1 ...
##  $ temp      : num  9.84 9.02 9.02 9.84 9.84 ...
##  $ atemp     : num  14.4 13.6 13.6 14.4 14.4 ...
##  $ humidity  : chr  "81" "80" "80" "75" ...
##  $ windspeed : num  0 0 0 0 0 ...
##  $ casual    : int  3 8 5 3 0 0 2 1 1 8 ...
##  $ registered: int  13 32 27 10 1 1 0 2 7 6 ...
##  $ count     : int  16 40 32 13 1 1 2 3 8 14 ...
##  $ sources   : chr  "ad campaign" "www.yahoo.com" "www.google.fi" "AD campaign" ...
summary(bike_data)
##    datetime             season         holiday          workingday    
##  Length:17379       Min.   :1.000   Min.   :0.00000   Min.   :0.0000  
##  Class :character   1st Qu.:2.000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Mode  :character   Median :3.000   Median :0.00000   Median :1.0000  
##                     Mean   :2.502   Mean   :0.02877   Mean   :0.6827  
##                     3rd Qu.:3.000   3rd Qu.:0.00000   3rd Qu.:1.0000  
##                     Max.   :4.000   Max.   :1.00000   Max.   :1.0000  
##     weather           temp           atemp         humidity        
##  Min.   :1.000   Min.   : 0.82   Min.   : 0.00   Length:17379      
##  1st Qu.:1.000   1st Qu.:13.94   1st Qu.:16.66   Class :character  
##  Median :1.000   Median :20.50   Median :24.24   Mode  :character  
##  Mean   :1.425   Mean   :20.38   Mean   :23.79                     
##  3rd Qu.:2.000   3rd Qu.:27.06   3rd Qu.:31.06                     
##  Max.   :4.000   Max.   :41.00   Max.   :50.00                     
##    windspeed          casual         registered        count    
##  Min.   : 0.000   Min.   :  0.00   Min.   :  0.0   Min.   :  1  
##  1st Qu.: 7.002   1st Qu.:  4.00   1st Qu.: 36.0   1st Qu.: 42  
##  Median :12.998   Median : 16.00   Median :116.0   Median :141  
##  Mean   :12.737   Mean   : 34.48   Mean   :152.5   Mean   :187  
##  3rd Qu.:16.998   3rd Qu.: 46.00   3rd Qu.:217.0   3rd Qu.:277  
##  Max.   :56.997   Max.   :367.00   Max.   :886.0   Max.   :977  
##    sources         
##  Length:17379      
##  Class :character  
##  Mode  :character  
##                    
##                    
## 

Question 3

bad_data <- str_subset(bike_data$humidity, "[a-z A-Z]")
bad_data
## [1] "x61"

Question 5

cast <- read_csv("raw_cast.csv")
## New names:
## Rows: 1298 Columns: 3
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): Name 1, Name 2 dbl (1): ...1
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
str(cast)
## spc_tbl_ [1,298 × 3] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ ...1  : num [1:1298] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Name 1: chr [1:1298] "Angela Bassett" "Peter Krause" "Oliver Stark" "Aisha Hinds" ...
##  $ Name 2: chr [1:1298] "Athena Grant\n                  87 episodes, 2018-2022" "Bobby Nash\n                  87 episodes, 2018-2022" "Evan 'Buck' Buckley\n                  87 episodes, 2018-2022" "Henrietta 'Hen' Wilson\n                  87 episodes, 2018-2022" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   ...1 = col_double(),
##   ..   `Name 1` = col_character(),
##   ..   `Name 2` = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

Question 8, 9, 10

cast$`Name 2` <- str_split_fixed(cast$"Name 2", "\n", 2)[,1]
cast$Episodes <- str_split_fixed(cast$`Name 2`, "\n", 2)[,2]
cast$Episodes <- str_trim(cast$Episodes, side = "both")
cast