library(robotstxt)

library(rvest)

library(stringr)
bike <- read.csv("bike_sharing_data.csv")
str(bike)
'data.frame':   17379 obs. of  13 variables:
 $ datetime  : chr  "1/1/2011 0:00" "1/1/2011 1:00" "1/1/2011 2:00" "1/1/2011 3:00" ...
 $ season    : int  1 1 1 1 1 1 1 1 1 1 ...
 $ holiday   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ workingday: int  0 0 0 0 0 0 0 0 0 0 ...
 $ weather   : int  1 1 1 1 1 2 1 1 1 1 ...
 $ temp      : num  9.84 9.02 9.02 9.84 9.84 ...
 $ atemp     : num  14.4 13.6 13.6 14.4 14.4 ...
 $ humidity  : chr  "81" "80" "80" "75" ...
 $ windspeed : num  0 0 0 0 0 ...
 $ casual    : int  3 8 5 3 0 0 2 1 1 8 ...
 $ registered: int  13 32 27 10 1 1 0 2 7 6 ...
 $ count     : int  16 40 32 13 1 1 2 3 8 14 ...
 $ sources   : chr  "ad campaign" "www.yahoo.com" "www.google.fi" "AD campaign" ...
summary(bike)
   datetime             season         holiday          workingday        weather           temp      
 Length:17379       Min.   :1.000   Min.   :0.00000   Min.   :0.0000   Min.   :1.000   Min.   : 0.82  
 Class :character   1st Qu.:2.000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:1.000   1st Qu.:13.94  
 Mode  :character   Median :3.000   Median :0.00000   Median :1.0000   Median :1.000   Median :20.50  
                    Mean   :2.502   Mean   :0.02877   Mean   :0.6827   Mean   :1.425   Mean   :20.38  
                    3rd Qu.:3.000   3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:2.000   3rd Qu.:27.06  
                    Max.   :4.000   Max.   :1.00000   Max.   :1.0000   Max.   :4.000   Max.   :41.00  
     atemp         humidity           windspeed          casual         registered        count    
 Min.   : 0.00   Length:17379       Min.   : 0.000   Min.   :  0.00   Min.   :  0.0   Min.   :  1  
 1st Qu.:16.66   Class :character   1st Qu.: 7.002   1st Qu.:  4.00   1st Qu.: 36.0   1st Qu.: 42  
 Median :24.24   Mode  :character   Median :12.998   Median : 16.00   Median :116.0   Median :141  
 Mean   :23.79                      Mean   :12.737   Mean   : 34.48   Mean   :152.5   Mean   :187  
 3rd Qu.:31.06                      3rd Qu.:16.998   3rd Qu.: 46.00   3rd Qu.:217.0   3rd Qu.:277  
 Max.   :50.00                      Max.   :56.997   Max.   :367.00   Max.   :886.0   Max.   :977  
   sources         
 Length:17379      
 Class :character  
 Mode  :character  
                   
                   
                   

Question 2.


table(is.na(df))
Warning: is.na() applied to non-(list or vector) of type 'closure'

FALSE 
    1 
which(is.na(df$var)==TRUE)
Error in df$var : object of type 'closure' is not subsettable
na.omit(df)
function (x, df1, df2, ncp, log = FALSE) 
{
    if (missing(ncp)) 
        .Call(C_df, x, df1, df2, log)
    else .Call(C_dnf, x, df1, df2, ncp, log)
}
<bytecode: 0x131423050>
<environment: namespace:stats>
complete.cases(df)
Error in complete.cases(df) : invalid 'type' (closure) of argument
str_detect(df,"NA")
Error in `str_detect()`:
! `string` must be a vector, not a function.
Backtrace:
 1. stringr::str_detect(df, "NA")

Questions 3.

bad_data <- str_subset(bike$humidity, "[a-z A-Z]")
bad_data
[1] "x61"
bike$humidity <- str_replace_all(bike$humidity,bad_data,"61")
bad_data_2 <- str_subset(bike$humidity, "[a-z A-Z]")
bad_data_2
character(0)

Question 4.

bike_complete$weather <- factor(bike_complete$weather, levels = c(1,2,3,4), labels = c("Clear","Mist","Light Snow/Rain", "Heavy Snow/Rain"))
raw_cast <- read.csv("raw_cast.csv")
str(raw_cast)
'data.frame':   1298 obs. of  3 variables:
 $ X     : int  1 2 3 4 5 6 7 8 9 10 ...
 $ Name.1: chr  "Angela Bassett" "Peter Krause" "Oliver Stark" "Aisha Hinds" ...
 $ Name.2: chr  "Athena Grant\n                  87 episodes, 2018-2022" "Bobby Nash\n                  87 episodes, 2018-2022" "Evan 'Buck' Buckley\n                  87 episodes, 2018-2022" "Henrietta 'Hen' Wilson\n                  87 episodes, 2018-2022" ...
raw_cast
str(raw_cast)
'data.frame':   1298 obs. of  3 variables:
 $ X     : int  1 2 3 4 5 6 7 8 9 10 ...
 $ Name.1: chr  "Angela Bassett" "Peter Krause" "Oliver Stark" "Aisha Hinds" ...
 $ Name.2: chr  "Athena Grant\n                  87 episodes, 2018-2022" "Bobby Nash\n                  87 episodes, 2018-2022" "Evan 'Buck' Buckley\n                  87 episodes, 2018-2022" "Henrietta 'Hen' Wilson\n                  87 episodes, 2018-2022" ...
raw_cast$`Name 2` <- str_split_fixed(raw_cast$Name.2, "\n", 2)[,1]
raw_cast$episodes <- str_split_fixed(raw_cast$`Name.2`, "\n", 2)[,2]
raw_cast$episodes <- str_trim(raw_cast$episodes, side="both")
raw_cast

Question 10.

trimCast <- str_replace_all(raw_cast$Episode, "[[:punct:]]", "")
trim <- str_replace_all(raw_cast$Episode, "[^[:alnum:]]", " ")
str_trim(raw_cast$Episode, side="both")
character(0)
LS0tCnRpdGxlOiAiQXNzaWdubWVudCA0IgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7cn0KbGlicmFyeShyb2JvdHN0eHQpCgpsaWJyYXJ5KHJ2ZXN0KQoKbGlicmFyeShzdHJpbmdyKQpgYGAKCmBgYHtyfQpiaWtlIDwtIHJlYWQuY3N2KCJiaWtlX3NoYXJpbmdfZGF0YS5jc3YiKQpzdHIoYmlrZSkKYGBgCmBgYHtyfQpzdW1tYXJ5KGJpa2UpCmBgYAoKIyMjIFF1ZXN0aW9uIDIuIApgYGB7cn0KCnRhYmxlKGlzLm5hKGRmKSkKYGBgCgoKYGBge3J9CndoaWNoKGlzLm5hKGRmJHZhcik9PVRSVUUpCmBgYAoKCmBgYHtyfQpuYS5vbWl0KGRmKQpgYGAKCmBgYHtyfQpjb21wbGV0ZS5jYXNlcyhkZikKYGBgCgpgYGB7cn0Kc3RyX2RldGVjdChkZiwiTkEiKQpgYGAKCiMjIyBRdWVzdGlvbnMgMy4gCmBgYHtyfQpiYWRfZGF0YSA8LSBzdHJfc3Vic2V0KGJpa2UkaHVtaWRpdHksICJbYS16IEEtWl0iKQpiYWRfZGF0YQpgYGAKCmBgYHtyfQpiaWtlJGh1bWlkaXR5IDwtIHN0cl9yZXBsYWNlX2FsbChiaWtlJGh1bWlkaXR5LGJhZF9kYXRhLCI2MSIpCmJhZF9kYXRhXzIgPC0gc3RyX3N1YnNldChiaWtlJGh1bWlkaXR5LCAiW2EteiBBLVpdIikKYmFkX2RhdGFfMgpgYGAKCgojIyMgUXVlc3Rpb24gNC4gCmBgYHtyfQpiaWtlX2NvbXBsZXRlJHdlYXRoZXIgPC0gZmFjdG9yKGJpa2VfY29tcGxldGUkd2VhdGhlciwgbGV2ZWxzID0gYygxLDIsMyw0KSwgbGFiZWxzID0gYygiQ2xlYXIiLCJNaXN0IiwiTGlnaHQgU25vdy9SYWluIiwgIkhlYXZ5IFNub3cvUmFpbiIpKQpgYGAKCmBgYHtyfQpyYXdfY2FzdCA8LSByZWFkLmNzdigicmF3X2Nhc3QuY3N2IikKc3RyKHJhd19jYXN0KQpgYGAKCmBgYHtyfQpyYXdfY2FzdApgYGAKCmBgYHtyfQpzdHIocmF3X2Nhc3QpCmBgYAoKYGBge3J9CnJhd19jYXN0JGBOYW1lIDJgIDwtIHN0cl9zcGxpdF9maXhlZChyYXdfY2FzdCROYW1lLjIsICJcbiIsIDIpWywxXQpyYXdfY2FzdCRlcGlzb2RlcyA8LSBzdHJfc3BsaXRfZml4ZWQocmF3X2Nhc3QkYE5hbWUuMmAsICJcbiIsIDIpWywyXQpyYXdfY2FzdCRlcGlzb2RlcyA8LSBzdHJfdHJpbShyYXdfY2FzdCRlcGlzb2Rlcywgc2lkZT0iYm90aCIpCnJhd19jYXN0CmBgYAojIyMgUXVlc3Rpb24gMTAuIApgYGB7cn0KdHJpbUNhc3QgPC0gc3RyX3JlcGxhY2VfYWxsKHJhd19jYXN0JEVwaXNvZGUsICJbWzpwdW5jdDpdXSIsICIiKQpgYGAKCmBgYHtyfQp0cmltIDwtIHN0cl9yZXBsYWNlX2FsbChyYXdfY2FzdCRFcGlzb2RlLCAiW15bOmFsbnVtOl1dIiwgIiAiKQpgYGAKCmBgYHtyfQpzdHJfdHJpbShyYXdfY2FzdCRFcGlzb2RlLCBzaWRlPSJib3RoIikKYGBgCgoKCgoKCg==