library(robotstxt)
library(rvest)
library(stringr)
bike <- read.csv("bike_sharing_data.csv")
str(bike)
'data.frame': 17379 obs. of 13 variables:
$ datetime : chr "1/1/2011 0:00" "1/1/2011 1:00" "1/1/2011 2:00" "1/1/2011 3:00" ...
$ season : int 1 1 1 1 1 1 1 1 1 1 ...
$ holiday : int 0 0 0 0 0 0 0 0 0 0 ...
$ workingday: int 0 0 0 0 0 0 0 0 0 0 ...
$ weather : int 1 1 1 1 1 2 1 1 1 1 ...
$ temp : num 9.84 9.02 9.02 9.84 9.84 ...
$ atemp : num 14.4 13.6 13.6 14.4 14.4 ...
$ humidity : chr "81" "80" "80" "75" ...
$ windspeed : num 0 0 0 0 0 ...
$ casual : int 3 8 5 3 0 0 2 1 1 8 ...
$ registered: int 13 32 27 10 1 1 0 2 7 6 ...
$ count : int 16 40 32 13 1 1 2 3 8 14 ...
$ sources : chr "ad campaign" "www.yahoo.com" "www.google.fi" "AD campaign" ...
summary(bike)
datetime season holiday workingday weather temp
Length:17379 Min. :1.000 Min. :0.00000 Min. :0.0000 Min. :1.000 Min. : 0.82
Class :character 1st Qu.:2.000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:1.000 1st Qu.:13.94
Mode :character Median :3.000 Median :0.00000 Median :1.0000 Median :1.000 Median :20.50
Mean :2.502 Mean :0.02877 Mean :0.6827 Mean :1.425 Mean :20.38
3rd Qu.:3.000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.:27.06
Max. :4.000 Max. :1.00000 Max. :1.0000 Max. :4.000 Max. :41.00
atemp humidity windspeed casual registered count
Min. : 0.00 Length:17379 Min. : 0.000 Min. : 0.00 Min. : 0.0 Min. : 1
1st Qu.:16.66 Class :character 1st Qu.: 7.002 1st Qu.: 4.00 1st Qu.: 36.0 1st Qu.: 42
Median :24.24 Mode :character Median :12.998 Median : 16.00 Median :116.0 Median :141
Mean :23.79 Mean :12.737 Mean : 34.48 Mean :152.5 Mean :187
3rd Qu.:31.06 3rd Qu.:16.998 3rd Qu.: 46.00 3rd Qu.:217.0 3rd Qu.:277
Max. :50.00 Max. :56.997 Max. :367.00 Max. :886.0 Max. :977
sources
Length:17379
Class :character
Mode :character
Question 2.
table(is.na(df))
Warning: is.na() applied to non-(list or vector) of type 'closure'
FALSE
1
which(is.na(df$var)==TRUE)
Error in df$var : object of type 'closure' is not subsettable
na.omit(df)
function (x, df1, df2, ncp, log = FALSE)
{
if (missing(ncp))
.Call(C_df, x, df1, df2, log)
else .Call(C_dnf, x, df1, df2, ncp, log)
}
<bytecode: 0x131423050>
<environment: namespace:stats>
complete.cases(df)
Error in complete.cases(df) : invalid 'type' (closure) of argument
str_detect(df,"NA")
Error in `str_detect()`:
! `string` must be a vector, not a function.
Backtrace:
1. stringr::str_detect(df, "NA")
Questions 3.
bad_data <- str_subset(bike$humidity, "[a-z A-Z]")
bad_data
[1] "x61"
bike$humidity <- str_replace_all(bike$humidity,bad_data,"61")
bad_data_2 <- str_subset(bike$humidity, "[a-z A-Z]")
bad_data_2
character(0)
Question 4.
bike_complete$weather <- factor(bike_complete$weather, levels = c(1,2,3,4), labels = c("Clear","Mist","Light Snow/Rain", "Heavy Snow/Rain"))
raw_cast <- read.csv("raw_cast.csv")
str(raw_cast)
'data.frame': 1298 obs. of 3 variables:
$ X : int 1 2 3 4 5 6 7 8 9 10 ...
$ Name.1: chr "Angela Bassett" "Peter Krause" "Oliver Stark" "Aisha Hinds" ...
$ Name.2: chr "Athena Grant\n 87 episodes, 2018-2022" "Bobby Nash\n 87 episodes, 2018-2022" "Evan 'Buck' Buckley\n 87 episodes, 2018-2022" "Henrietta 'Hen' Wilson\n 87 episodes, 2018-2022" ...
raw_cast
str(raw_cast)
'data.frame': 1298 obs. of 3 variables:
$ X : int 1 2 3 4 5 6 7 8 9 10 ...
$ Name.1: chr "Angela Bassett" "Peter Krause" "Oliver Stark" "Aisha Hinds" ...
$ Name.2: chr "Athena Grant\n 87 episodes, 2018-2022" "Bobby Nash\n 87 episodes, 2018-2022" "Evan 'Buck' Buckley\n 87 episodes, 2018-2022" "Henrietta 'Hen' Wilson\n 87 episodes, 2018-2022" ...
raw_cast$`Name 2` <- str_split_fixed(raw_cast$Name.2, "\n", 2)[,1]
raw_cast$episodes <- str_split_fixed(raw_cast$`Name.2`, "\n", 2)[,2]
raw_cast$episodes <- str_trim(raw_cast$episodes, side="both")
raw_cast
Question 10.
trimCast <- str_replace_all(raw_cast$Episode, "[[:punct:]]", "")
trim <- str_replace_all(raw_cast$Episode, "[^[:alnum:]]", " ")
str_trim(raw_cast$Episode, side="both")
character(0)
LS0tCnRpdGxlOiAiQXNzaWdubWVudCA0IgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7cn0KbGlicmFyeShyb2JvdHN0eHQpCgpsaWJyYXJ5KHJ2ZXN0KQoKbGlicmFyeShzdHJpbmdyKQpgYGAKCmBgYHtyfQpiaWtlIDwtIHJlYWQuY3N2KCJiaWtlX3NoYXJpbmdfZGF0YS5jc3YiKQpzdHIoYmlrZSkKYGBgCmBgYHtyfQpzdW1tYXJ5KGJpa2UpCmBgYAoKIyMjIFF1ZXN0aW9uIDIuIApgYGB7cn0KCnRhYmxlKGlzLm5hKGRmKSkKYGBgCgoKYGBge3J9CndoaWNoKGlzLm5hKGRmJHZhcik9PVRSVUUpCmBgYAoKCmBgYHtyfQpuYS5vbWl0KGRmKQpgYGAKCmBgYHtyfQpjb21wbGV0ZS5jYXNlcyhkZikKYGBgCgpgYGB7cn0Kc3RyX2RldGVjdChkZiwiTkEiKQpgYGAKCiMjIyBRdWVzdGlvbnMgMy4gCmBgYHtyfQpiYWRfZGF0YSA8LSBzdHJfc3Vic2V0KGJpa2UkaHVtaWRpdHksICJbYS16IEEtWl0iKQpiYWRfZGF0YQpgYGAKCmBgYHtyfQpiaWtlJGh1bWlkaXR5IDwtIHN0cl9yZXBsYWNlX2FsbChiaWtlJGh1bWlkaXR5LGJhZF9kYXRhLCI2MSIpCmJhZF9kYXRhXzIgPC0gc3RyX3N1YnNldChiaWtlJGh1bWlkaXR5LCAiW2EteiBBLVpdIikKYmFkX2RhdGFfMgpgYGAKCgojIyMgUXVlc3Rpb24gNC4gCmBgYHtyfQpiaWtlX2NvbXBsZXRlJHdlYXRoZXIgPC0gZmFjdG9yKGJpa2VfY29tcGxldGUkd2VhdGhlciwgbGV2ZWxzID0gYygxLDIsMyw0KSwgbGFiZWxzID0gYygiQ2xlYXIiLCJNaXN0IiwiTGlnaHQgU25vdy9SYWluIiwgIkhlYXZ5IFNub3cvUmFpbiIpKQpgYGAKCmBgYHtyfQpyYXdfY2FzdCA8LSByZWFkLmNzdigicmF3X2Nhc3QuY3N2IikKc3RyKHJhd19jYXN0KQpgYGAKCmBgYHtyfQpyYXdfY2FzdApgYGAKCmBgYHtyfQpzdHIocmF3X2Nhc3QpCmBgYAoKYGBge3J9CnJhd19jYXN0JGBOYW1lIDJgIDwtIHN0cl9zcGxpdF9maXhlZChyYXdfY2FzdCROYW1lLjIsICJcbiIsIDIpWywxXQpyYXdfY2FzdCRlcGlzb2RlcyA8LSBzdHJfc3BsaXRfZml4ZWQocmF3X2Nhc3QkYE5hbWUuMmAsICJcbiIsIDIpWywyXQpyYXdfY2FzdCRlcGlzb2RlcyA8LSBzdHJfdHJpbShyYXdfY2FzdCRlcGlzb2Rlcywgc2lkZT0iYm90aCIpCnJhd19jYXN0CmBgYAojIyMgUXVlc3Rpb24gMTAuIApgYGB7cn0KdHJpbUNhc3QgPC0gc3RyX3JlcGxhY2VfYWxsKHJhd19jYXN0JEVwaXNvZGUsICJbWzpwdW5jdDpdXSIsICIiKQpgYGAKCmBgYHtyfQp0cmltIDwtIHN0cl9yZXBsYWNlX2FsbChyYXdfY2FzdCRFcGlzb2RlLCAiW15bOmFsbnVtOl1dIiwgIiAiKQpgYGAKCmBgYHtyfQpzdHJfdHJpbShyYXdfY2FzdCRFcGlzb2RlLCBzaWRlPSJib3RoIikKYGBgCgoKCgoKCg==