library (stringr)
library(readr)
bike_data <- read.csv("bike_sharing_data.csv")
str(bike_data)
## 'data.frame': 17379 obs. of 13 variables:
## $ datetime : chr "1/1/2011 0:00" "1/1/2011 1:00" "1/1/2011 2:00" "1/1/2011 3:00" ...
## $ season : int 1 1 1 1 1 1 1 1 1 1 ...
## $ holiday : int 0 0 0 0 0 0 0 0 0 0 ...
## $ workingday: int 0 0 0 0 0 0 0 0 0 0 ...
## $ weather : int 1 1 1 1 1 2 1 1 1 1 ...
## $ temp : num 9.84 9.02 9.02 9.84 9.84 ...
## $ atemp : num 14.4 13.6 13.6 14.4 14.4 ...
## $ humidity : chr "81" "80" "80" "75" ...
## $ windspeed : num 0 0 0 0 0 ...
## $ casual : int 3 8 5 3 0 0 2 1 1 8 ...
## $ registered: int 13 32 27 10 1 1 0 2 7 6 ...
## $ count : int 16 40 32 13 1 1 2 3 8 14 ...
## $ sources : chr "ad campaign" "www.yahoo.com" "www.google.fi" "AD campaign" ...
summary(bike_data)
## datetime season holiday workingday
## Length:17379 Min. :1.000 Min. :0.00000 Min. :0.0000
## Class :character 1st Qu.:2.000 1st Qu.:0.00000 1st Qu.:0.0000
## Mode :character Median :3.000 Median :0.00000 Median :1.0000
## Mean :2.502 Mean :0.02877 Mean :0.6827
## 3rd Qu.:3.000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :4.000 Max. :1.00000 Max. :1.0000
## weather temp atemp humidity
## Min. :1.000 Min. : 0.82 Min. : 0.00 Length:17379
## 1st Qu.:1.000 1st Qu.:13.94 1st Qu.:16.66 Class :character
## Median :1.000 Median :20.50 Median :24.24 Mode :character
## Mean :1.425 Mean :20.38 Mean :23.79
## 3rd Qu.:2.000 3rd Qu.:27.06 3rd Qu.:31.06
## Max. :4.000 Max. :41.00 Max. :50.00
## windspeed casual registered count
## Min. : 0.000 Min. : 0.00 Min. : 0.0 Min. : 1
## 1st Qu.: 7.002 1st Qu.: 4.00 1st Qu.: 36.0 1st Qu.: 42
## Median :12.998 Median : 16.00 Median :116.0 Median :141
## Mean :12.737 Mean : 34.48 Mean :152.5 Mean :187
## 3rd Qu.:16.998 3rd Qu.: 46.00 3rd Qu.:217.0 3rd Qu.:277
## Max. :56.997 Max. :367.00 Max. :886.0 Max. :977
## sources
## Length:17379
## Class :character
## Mode :character
##
##
##
bad_data <- str_subset(bike_data$humidity, "[a-z A-Z]")
bad_data
## [1] "x61"
bike_data$humidity <- str_replace_all(bike_data$humidity,bad_data,"61")
bad_data_2 <- str_subset(bike_data$humidity, "[a-z A-Z]")
bad_data_2
## character(0)
raw_cast <- read.csv("raw_cast.csv")
raw_cast
str(raw_cast)
## 'data.frame': 1298 obs. of 3 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Name.1: chr "Angela Bassett" "Peter Krause" "Oliver Stark" "Aisha Hinds" ...
## $ Name.2: chr "Athena Grant\n 87 episodes, 2018-2022" "Bobby Nash\n 87 episodes, 2018-2022" "Evan 'Buck' Buckley\n 87 episodes, 2018-2022" "Henrietta 'Hen' Wilson\n 87 episodes, 2018-2022" ...
split_values <- str_split_fixed(raw_cast$Name.2, "\n", 2)
raw_cast$`Name 2` <- split_values[, 1]
raw_cast$episodes <- str_trim(split_values[, 2], side = "both")
raw_cast