c(-1,0,1)/0
[1] -Inf  NaN  Inf

#Describe the Data

bike <- read.csv("bike_sharing_data.csv")
str(bike)
'data.frame':   17379 obs. of  13 variables:
 $ datetime  : chr  "1/1/2011 0:00" "1/1/2011 1:00" "1/1/2011 2:00" "1/1/2011 3:00" ...
 $ season    : int  1 1 1 1 1 1 1 1 1 1 ...
 $ holiday   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ workingday: int  0 0 0 0 0 0 0 0 0 0 ...
 $ weather   : int  1 1 1 1 1 2 1 1 1 1 ...
 $ temp      : num  9.84 9.02 9.02 9.84 9.84 ...
 $ atemp     : num  14.4 13.6 13.6 14.4 14.4 ...
 $ humidity  : chr  "81" "80" "80" "75" ...
 $ windspeed : num  0 0 0 0 0 ...
 $ casual    : int  3 8 5 3 0 0 2 1 1 8 ...
 $ registered: int  13 32 27 10 1 1 0 2 7 6 ...
 $ count     : int  16 40 32 13 1 1 2 3 8 14 ...
 $ sources   : chr  "ad campaign" "www.yahoo.com" "www.google.fi" "AD campaign" ...
# 1. humidity: chr -> int
# 2. seaon, holiday, workingday, weather: int -> factor 
# 3. datetime: chr -> POSIXct

Recode data type

# Approach one
table(bike$humidity)

  0  10 100  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31 
 22   1 270   1   1   2   4  10  10  10  16  17  26  27  46  56  59  78  71  97 106 113 118 
 32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54 
 99 162 133 163 187 224 186 209 224 290 235 270 244 248 316 247 240 327 266 262 312 267 287 
 55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77 
352 310 231 258 272 267 335 325 163 219 387 388 161 172 359 430 193 191 317 341 222 219 336 
 78  79   8  80  81  82  83  84  85  86  87  88  89  90  91  92  93  94  96  97 x61 
327 238   1 107 275 299 630 124   5  76 488 657 239   7   1   2 331 560   3   1   1 
which(bike$humidity == "x61")
[1] 14177
bike[14177, "humidity"] <- 61

#Alternatively,
bike[which(bike$humidity == "x61"), "humidity"] <- 61

#Approach two
pacman::p_load(stringr)
str_subset(bike$humidity, "[a-z, A-Z]")
character(0)
str_replace_all(bike$humidity, "x61", "61")
   [1] "81"  "80"  "80"  "75"  "75"  "75"  "80"  "86"  "75"  "76"  "76"  "81"  "77"  "72" 
  [15] "72"  "77"  "82"  "82"  "88"  "88"  "87"  "87"  "94"  "88"  "88"  "94"  "100" "94" 
  [29] "94"  "77"  "76"  "71"  "76"  "81"  "71"  "66"  "66"  "76"  "81"  "71"  "57"  "46" 
  [43] "42"  "39"  "44"  "44"  "47"  "44"  "44"  "47"  "47"  "50"  "50"  "50"  "43"  "43" 
  [57] "40"  "35"  "35"  "30"  "30"  "30"  "30"  "32"  "47"  "47"  "64"  "69"  "55"  "55" 
  [71] "59"  "63"  "63"  "68"  "74"  "74"  "69"  "64"  "69"  "51"  "51"  "56"  "52"  "52" 
  [85] "49"  "48"  "48"  "48"  "48"  "64"  "64"  "69"  "64"  "74"  "74"  "48"  "47"  "47" 
  [99] "43"  "40"  "37"  "37"  "33"  "33"  "30"  "28"  "28"  "28"  "38"  "38"  "38"  "47" 
 [113] "51"  "55"  "47"  "55"  "64"  "64"  "64"  "69"  "63"  "59"  "59"  "51"  "47"  "44" 
 [127] "35"  "35"  "36"  "36"  "38"  "51"  "51"  "55"  "51"  "55"  "51"  "59"  "64"  "69" 
 [141] "69"  "69"  "55"  "69"  "69"  "51"  "47"  "37"  "40"  "37"  "37"  "40"  "37"  "37" 
 [155] "37"  "40"  "55"  "47"  "47"  "43"  "51"  "51"  "55"  "55"  "55"  "55"  "74"  "74" 
 [169] "74"  "93"  "93"  "80"  "69"  "59"  "44"  "32"  "32"  "29"  "37"  "39"  "36"  "36" 
 [183] "39"  "36"  "39"  "42"  "42"  "46"  "46"  "53"  "53"  "49"  "53"  "49"  "46"  "43" 
 [197] "40"  "37"  "34"  "32"  "35"  "34"  "37"  "40"  "43"  "46"  "46"  "46"  "50"  "50" 
 [211] "50"  "50"  "50"  "54"  "54"  "50"  "50"  "50"  "50"  "50"  "47"  "40"  "40"  "40" 
 [225] "40"  "40"  "40"  "40"  "47"  "50"  "59"  "59"  "59"  "59"  "59"  "55"  "55"  "55" 
 [239] "55"  "51"  "51"  "51"  "47"  "51"  "59"  "59"  "80"  "86"  "86"  "93"  "93"  "93" 
 [253] "86"  "93"  "93"  "86"  "86"  "86"  "86"  "93"  "69"  "59"  "59"  "55"  "51"  "47" 
 [267] "44"  "47"  "47"  "44"  "47"  "47"  "47"  "50"  "55"  "55"  "59"  "59"  "50"  "50" 
 [281] "50"  "50"  "50"  "54"  "54"  "50"  "50"  "50"  "44"  "44"  "41"  "41"  "38"  "38" 
 [295] "40"  "40"  "47"  "47"  "46"  "46"  "50"  "50"  "54"  "54"  "54"  "54"  "54"  "74" 
 [309] "68"  "69"  "55"  "51"  "44"  "37"  "41"  "38"  "41"  "41"  "47"  "59"  "59"  "69" 
 [323] "69"  "55"  "55"  "59"  "59"  "59"  "59"  "59"  "63"  "63"  "63"  "64"  "59"  "55" 
 [337] "48"  "38"  "39"  "36"  "34"  "36"  "45"  "39"  "39"  "39"  "42"  "45"  "56"  "56" 
 [351] "56"  "69"  "56"  "56"  "56"  "56"  "55"  "51"  "51"  "44"  "41"  "35"  "36"  "38" 
 [365] "38"  "37"  "40"  "47"  "47"  "51"  "49"  "40"  "47"  "44"  "43"  "43"  "43"  "43" 
 [379] "43"  "50"  "47"  "47"  "50"  "55"  "47"  "47"  "43"  "47"  "47"  "51"  "55"  "59" 
 [393] "80"  "80"  "93"  "86"  "86"  "86"  "80"  "87"  "87"  "82"  "80"  "80"  "87"  "93" 
 [407] "93"  "93"  "93"  "93"  "93"  "93"  "93"  "93"  "92"  "93"  "93"  "93"  "87"  "81" 
 [421] "62"  "58"  "54"  "58"  "57"  "61"  "57"  "49"  "49"  "52"  "52"  "56"  "56"  "56" 
 [435] "56"  "56"  "60"  "60"  "55"  "55"  "52"  "48"  "45"  "42"  "45"  "45"  "45"  "49" 
 [449] "49"  "56"  "56"  "60"  "60"  "65"  "65"  "70"  "70"  "75"  "80"  "87"  "60"  "55" 
 [463] "51"  "47"  "51"  "47"  "41"  "27"  "21"  "25"  "26"  "26"  "28"  "30"  "30"  "33" 
 [477] "38"  "41"  "38"  "45"  "41"  "41"  "41"  "48"  "44"  "44"  "44"  "41"  "41"  "38" 
 [491] "38"  "35"  "33"  "28"  "28"  "28"  "35"  "35"  "45"  "41"  "49"  "57"  "57"  "57" 
 [505] "62"  "62"  "57"  "57"  "58"  "62"  "54"  "46"  "43"  "37"  "33"  "28"  "28"  "26" 
 [519] "26"  "30"  "30"  "36"  "36"  "38"  "41"  "41"  "45"  "45"  "48"  "48"  "48"  "48" 
 [533] "49"  "41"  "42"  "46"  "42"  "43"  "46"  "40"  "47"  "50"  "59"  "54"  "59"  "63" 
 [547] "63"  "64"  "69"  "69"  "69"  "74"  "74"  "74"  "74"  "74"  "64"  "60"  "60"  "56" 
 [561] "56"  "45"  "42"  "42"  "45"  "49"  "65"  "65"  "65"  "64"  "64"  "69"  "65"  "69" 
 [575] "86"  "86"  "87"  "87"  "87"  "93"  "93"  "93"  "93"  "93"  "93"  "93"  "93"  "55" 
 [589] "55"  "69"  "69"  "74"  "74"  "74"  "80"  "75"  "75"  "75"  "75"  "80"  "80"  "86" 
 [603] "86"  "86"  "86"  "93"  "93"  "93"  "80"  "86"  "80"  "75"  "75"  "75"  "70"  "75" 
 [617] "65"  "60"  "64"  "64"  "64"  "64"  "69"  "64"  "59"  "55"  "59"  "64"  "64"  "59" 
 [631] "55"  "60"  "69"  "60"  "60"  "69"  "69"  "74"  "74"  "80"  "80"  "80"  "80"  "80" 
 [645] "93"  "93"  "86"  "86"  "86"  "80"  "80"  "75"  "52"  "61"  "61"  "56"  "56"  "56" 
 [659] "65"  "65"  "65"  "70"  "70"  "65"  "65"  "64"  "64"  "64"  "59"  "64"  "69"  "64" 
 [673] "59"  "59"  "59"  "59"  "55"  "59"  "55"  "59"  "56"  "59"  "55"  "61"  "59"  "59" 
 [687] "59"  "64"  "64"  "69"  "69"  "69"  "93"  "93"  "93"  "93"  "93"  "93"  "86"  "86" 
 [701] "86"  "80"  "75"  "75"  "75"  "81"  "81"  "87"  "87"  "87"  "93"  "93"  "93"  "93" 
 [715] "93"  "93"  "93"  "93"  "93"  "93"  "93"  "100" "93"  "93"  "93"  "82"  "76"  "71" 
 [729] "53"  "42"  "45"  "48"  "47"  "44"  "44"  "40"  "44"  "43"  "43"  "50"  "43"  "50" 
 [743] "50"  "47"  "43"  "43"  "40"  "40"  "37"  "37"  "37"  "40"  "40"  "40"  "47"  "55" 
 [757] "51"  "47"  "44"  "59"  "63"  "63"  "63"  "55"  "59"  "74"  "80"  "51"  "51"  "48" 
 [771] "50"  "45"  "48"  "42"  "56"  "60"  "65"  "65"  "70"  "65"  "70"  "70"  "65"  "75" 
 [785] "75"  "93"  "100" "100" "93"  "100" "100" "100" "100" "100" "100" "100" "100" "100"
 [799] "100" "93"  "93"  "87"  "100" "93"  "93"  "70"  "65"  "60"  "60"  "60"  "60"  "60" 
 [813] "65"  "65"  "56"  "52"  "49"  "46"  "46"  "46"  "46"  "49"  "46"  "56"  "61"  "61" 
 [827] "60"  "60"  "65"  "65"  "75"  "80"  "86"  "86"  "86"  "93"  "86"  "100" "80"  "75" 
 [841] "65"  "62"  "57"  "57"  "54"  "57"  "70"  "66"  "70"  "70"  "70"  "81"  "81"  "87" 
 [855] "87"  "93"  "93"  "93"  "81"  "70"  "65"  "56"  "52"  "47"  "47"  "38"  "32"  "37" 
 [869] "35"  "35"  "32"  "32"  "40"  "40"  "33"  "33"  "33"  "36"  "36"  "38"  "45"  "45" 
 [883] "45"  "49"  "42"  "39"  "36"  "43"  "40"  "40"  "34"  "34"  "37"  "34"  "47"  "86" 
 [897] "86"  "86"  "80"  "80"  "86"  "80"  "80"  "59"  "50"  "54"  "50"  "49"  "42"  "42" 
 [911] "39"  "34"  "29"  "27"  "25"  "27"  "26"  "28"  "28"  "31"  "39"  "39"  "42"  "49" 
 [925] "54"  "54"  "63"  "68"  "73"  "74"  "74"  "69"  "47"  "47"  "35"  "26"  "21"  "28" 
 [939] "24"  "28"  "33"  "55"  "59"  "64"  "69"  "69"  "86"  "80"  "80"  "74"  "74"  "93" 
 [953] "80"  "86"  "64"  "41"  "28"  "39"  "39"  "31"  "29"  "29"  "31"  "38"  "38"  "41" 
 [967] "41"  "44"  "51"  "64"  "59"  "75"  "69"  "69"  "69"  "55"  "64"  "60"  "45"  "39" 
 [981] "32"  "29"  "30"  "30"  "28"  "28"  "32"  "35"  "35"  "35"  "35"  "35"  "37"  "37" 
 [995] "40"  "46"  "53"  "53"  "46"  "46" 
 [ reached getOption("max.print") -- omitted 16379 entries ]
bike$humidity <- as.integer(bike$humidity)
class(bike$humidity)
[1] "integer"
hist(bike$humidity)

NA
NA
str(bike$season)
 int [1:17379] 1 1 1 1 1 1 1 1 1 1 ...
bike$season <- as.factor(bike$season)

bike$season <- factor(bike$season, levels = c(1,2,3,4),
       labels= c("spring" , "summer" , "fall", "winter"))

summary(bike)
   datetime            season        holiday          workingday        weather     
 Length:17379       spring:4242   Min.   :0.00000   Min.   :0.0000   Min.   :1.000  
 Class :character   summer:4409   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:1.000  
 Mode  :character   fall  :4496   Median :0.00000   Median :1.0000   Median :1.000  
                    winter:4232   Mean   :0.02877   Mean   :0.6827   Mean   :1.425  
                                  3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:2.000  
                                  Max.   :1.00000   Max.   :1.0000   Max.   :4.000  
      temp           atemp          humidity        windspeed          casual      
 Min.   : 0.82   Min.   : 0.00   Min.   :  0.00   Min.   : 0.000   Min.   :  0.00  
 1st Qu.:13.94   1st Qu.:16.66   1st Qu.: 48.00   1st Qu.: 7.002   1st Qu.:  4.00  
 Median :20.50   Median :24.24   Median : 63.00   Median :12.998   Median : 16.00  
 Mean   :20.38   Mean   :23.79   Mean   : 62.72   Mean   :12.737   Mean   : 34.48  
 3rd Qu.:27.06   3rd Qu.:31.06   3rd Qu.: 78.00   3rd Qu.:16.998   3rd Qu.: 46.00  
 Max.   :41.00   Max.   :50.00   Max.   :100.00   Max.   :56.997   Max.   :367.00  
   registered        count       sources         
 Min.   :  0.0   Min.   :  1   Length:17379      
 1st Qu.: 36.0   1st Qu.: 42   Class :character  
 Median :116.0   Median :141   Mode  :character  
 Mean   :152.5   Mean   :187                     
 3rd Qu.:217.0   3rd Qu.:277                     
 Max.   :886.0   Max.   :977                     
pacman::p_load(lubridate)

str(bike$datetime)
 chr [1:17379] "1/1/2011 0:00" "1/1/2011 1:00" "1/1/2011 2:00" "1/1/2011 3:00" ...
bike$datetime <- mdy_hm(bike$datetime)

#Deal with missing values

sum(is.na(bike))
[1] 554
apply(is.na(bike), 2, sum)
  datetime     season    holiday workingday    weather       temp      atemp   humidity 
         0          0          0          0          0          0          0          0 
 windspeed     casual registered      count    sources 
         0          0          0          0        554 
str_detect(bike, "NA")
Warning: argument is not an atomic vector; coercing
 [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
bike_complete <- bike[which(is.na(bike$sources)),]

bike_complete2 <- bike[complete.cases(bike),]

bike_complete3 <-na.omit(bike)
complete.cases(bike)
   [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
  [15]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
  [29]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
  [43]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
  [57]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
  [71] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
  [85]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
  [99]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [113]  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [127]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE
 [141]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [155]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [169]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [183]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [197]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [211]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [225]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [239]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [253]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [267]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [281]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [295]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [309]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [323]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
 [337]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [351]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [365]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [379]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [393]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [407]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [421]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [435]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [449]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
 [463]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [477]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [491]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [505]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [519]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
 [533]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [547] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
 [561]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [575]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [589]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
 [603]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
 [617]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [631]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
 [645]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [659]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [673]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [687]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [701] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [715]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [729]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [743]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [757]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [771]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [785]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [799]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [813]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE
 [827]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [841]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [855]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [869]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
 [883]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
 [897]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [911]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [925]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [939]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
 [953]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [967]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [981]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
 [995]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
 [ reached getOption("max.print") -- omitted 16379 entries ]

Deal with strings/character data

  1. Normalize characters: same case
  2. Extra blank spaces
  3. Mathcing characters
table(bike$sources)

     ad campaign      Ad Campaign      AD campaign             blog           direct 
            3472              851              894              494             1610 
   facebook page          Twitter      Twitter         www.bing.com www.google.co.uk 
            1551              890              855             1595             1553 
  www.google.com    www.google.fi    www.yahoo.com 
             527              828             1705 

#Normalize characters

bike$sources <- tolower(bike$sources)
table(bike$sources)

     ad campaign             blog           direct    facebook page          twitter 
            5217              494             1610             1551              890 
     twitter         www.bing.com www.google.co.uk   www.google.com    www.google.fi 
             855             1595             1553              527              828 
   www.yahoo.com 
            1705 

#Remove unneeded spaces

library(stringr)
bike$sources <- str_trim(bike$sources, "both")
table(bike$sourcse)
< table of extent 0 >

#Pattern matching

goog_indices <- grep("google", bike$sources)
goog_indices
   [1]    3   13   15   16   19   22   28   31   35   37   47   58   64   67   80   83   89
  [18]   94   99  105  109  119  120  134  146  148  151  156  159  160  165  170  178  208
  [35]  219  222  234  244  246  247  253  254  257  259  262  272  282  294  296  306  315
  [52]  319  324  329  334  338  347  350  364  365  386  396  397  401  412  419  423  425
  [69]  430  436  442  448  451  459  471  475  476  477  482  494  499  505  519  526  532
  [86]  537  540  544  550  556  562  569  583  590  591  603  604  607  611  631  632  645
 [103]  650  652  657  659  669  670  672  673  675  678  680  686  688  690  696  697  700
 [120]  704  711  712  713  719  722  724  738  740  741  747  757  769  775  780  782  796
 [137]  816  826  828  829  831  848  855  860  878  883  885  886  904  905  910  912  923
 [154]  935  936  937  943  946  950  959  960  963  967  970  971  978  987  988  992  994
 [171] 1004 1008 1015 1018 1019 1022 1032 1034 1036 1037 1040 1047 1053 1057 1058 1061 1063
 [188] 1070 1084 1086 1091 1098 1103 1107 1110 1118 1129 1132 1138 1142 1155 1157 1162 1163
 [205] 1175 1180 1185 1188 1193 1196 1197 1199 1203 1215 1217 1224 1230 1248 1252 1260 1262
 [222] 1268 1271 1276 1293 1298 1302 1305 1308 1309 1315 1317 1324 1330 1336 1337 1340 1344
 [239] 1353 1358 1360 1362 1367 1368 1377 1380 1386 1390 1393 1395 1400 1402 1407 1414 1426
 [256] 1428 1438 1442 1443 1445 1446 1455 1459 1476 1480 1490 1494 1495 1503 1516 1524 1525
 [273] 1526 1529 1533 1534 1541 1563 1573 1578 1583 1602 1607 1610 1612 1613 1617 1620 1632
 [290] 1639 1641 1643 1670 1671 1678 1684 1693 1696 1698 1701 1704 1707 1709 1728 1733 1751
 [307] 1755 1756 1758 1761 1766 1770 1776 1789 1790 1792 1794 1795 1799 1808 1810 1819 1824
 [324] 1825 1832 1833 1855 1865 1877 1881 1882 1884 1891 1905 1907 1914 1919 1923 1936 1939
 [341] 1941 1944 1946 1948 1951 1953 1957 1958 1961 1968 1974 1997 1998 2004 2009 2022 2034
 [358] 2035 2053 2066 2072 2077 2081 2084 2086 2090 2094 2123 2132 2137 2166 2167 2183 2184
 [375] 2187 2188 2200 2203 2204 2205 2207 2216 2218 2227 2242 2247 2250 2251 2252 2268 2286
 [392] 2301 2303 2311 2322 2324 2327 2344 2352 2354 2355 2360 2365 2375 2377 2380 2391 2404
 [409] 2407 2409 2413 2417 2434 2435 2438 2447 2450 2456 2481 2498 2503 2510 2514 2517 2518
 [426] 2519 2534 2539 2553 2555 2578 2580 2588 2591 2601 2607 2615 2624 2627 2631 2635 2636
 [443] 2641 2643 2655 2660 2661 2675 2694 2702 2707 2709 2710 2717 2719 2722 2723 2727 2732
 [460] 2743 2744 2746 2751 2755 2757 2765 2772 2777 2782 2789 2791 2792 2799 2810 2814 2820
 [477] 2821 2823 2827 2831 2840 2842 2847 2849 2854 2859 2860 2862 2863 2881 2887 2901 2903
 [494] 2916 2922 2926 2939 2940 2942 2950 2972 2978 2987 2991 3003 3011 3012 3018 3041 3043
 [511] 3053 3057 3065 3066 3076 3078 3080 3081 3082 3089 3097 3109 3114 3119 3157 3159 3161
 [528] 3169 3181 3187 3190 3193 3195 3201 3203 3223 3224 3229 3234 3237 3238 3260 3264 3280
 [545] 3286 3289 3294 3299 3303 3304 3312 3319 3322 3333 3335 3358 3359 3362 3368 3375 3376
 [562] 3383 3399 3405 3417 3424 3430 3432 3439 3443 3450 3461 3464 3467 3474 3476 3480 3482
 [579] 3487 3489 3520 3534 3537 3540 3545 3547 3554 3560 3562 3563 3568 3570 3571 3580 3587
 [596] 3591 3592 3593 3599 3611 3631 3634 3635 3674 3681 3683 3693 3694 3708 3714 3716 3719
 [613] 3725 3731 3735 3742 3756 3772 3780 3797 3810 3812 3813 3814 3821 3830 3831 3839 3844
 [630] 3846 3858 3862 3863 3868 3871 3885 3886 3888 3896 3899 3902 3905 3907 3913 3916 3918
 [647] 3922 3924 3925 3926 3934 3935 3942 3943 3966 3967 3971 3974 3994 3998 4002 4004 4005
 [664] 4006 4012 4027 4030 4043 4047 4070 4078 4085 4109 4111 4122 4128 4131 4146 4156 4160
 [681] 4162 4167 4170 4172 4175 4178 4181 4184 4187 4196 4201 4205 4218 4220 4221 4238 4246
 [698] 4261 4265 4268 4269 4271 4279 4288 4299 4302 4323 4336 4346 4347 4350 4352 4362 4365
 [715] 4367 4371 4377 4381 4388 4421 4434 4438 4439 4446 4452 4456 4463 4465 4466 4468 4471
 [732] 4482 4490 4494 4495 4496 4519 4525 4526 4528 4556 4562 4563 4575 4594 4596 4597 4601
 [749] 4605 4622 4625 4635 4637 4645 4655 4663 4665 4669 4676 4677 4685 4687 4692 4697 4701
 [766] 4711 4722 4727 4731 4732 4753 4755 4758 4760 4766 4768 4772 4799 4815 4823 4842 4846
 [783] 4861 4873 4874 4878 4887 4890 4904 4908 4915 4926 4930 4933 4942 4943 4946 4960 4964
 [800] 4967 4973 4974 4978 4979 5003 5010 5011 5021 5026 5029 5041 5053 5060 5065 5073 5077
 [817] 5086 5094 5111 5112 5119 5128 5132 5133 5137 5151 5152 5156 5159 5162 5165 5171 5176
 [834] 5184 5185 5186 5191 5193 5195 5196 5198 5199 5215 5219 5225 5231 5250 5251 5255 5264
 [851] 5266 5271 5279 5285 5293 5299 5303 5306 5307 5308 5315 5316 5319 5325 5336 5338 5339
 [868] 5345 5348 5352 5357 5365 5368 5373 5374 5375 5377 5379 5381 5385 5387 5388 5392 5400
 [885] 5402 5406 5411 5412 5416 5417 5426 5427 5430 5435 5450 5455 5458 5465 5474 5485 5488
 [902] 5490 5494 5501 5513 5516 5517 5518 5525 5526 5527 5529 5532 5533 5552 5553 5555 5559
 [919] 5591 5594 5597 5604 5612 5613 5626 5631 5633 5647 5654 5670 5672 5679 5694 5698 5702
 [936] 5709 5711 5717 5740 5743 5746 5759 5761 5768 5771 5779 5781 5790 5794 5795 5796 5798
 [953] 5832 5836 5840 5841 5843 5844 5846 5853 5854 5856 5857 5858 5860 5862 5866 5875 5879
 [970] 5881 5885 5890 5893 5896 5906 5908 5912 5919 5928 5938 5941 5950 5954 5955 5959 5963
 [987] 5969 5970 5992 6000 6009 6010 6017 6021 6022 6023 6025 6028 6040 6044
 [ reached getOption("max.print") -- omitted 1908 entries ]
bike$sources[goog_indices] <- "google"
table(bike$sources)

  ad campaign          blog        direct facebook page        google       twitter 
         5217           494          1610          1551          2908          1745 
 www.bing.com www.yahoo.com 
         1595          1705 
bad_data <- str_subset(bike_complete$humidity, "[a-z A-Z]")
bike_complete$humidity <- str_replace_all(bike_complete$humidity,bad_data,"61")
Error in `str_replace_all()`:
! Can't recycle `string` (size 554) to match `pattern` (size 0).
Backtrace:
 1. stringr::str_replace_all(bike_complete$humidity, bad_data, "61")
str(cast)
'data.frame':   1298 obs. of  3 variables:
 $ X     : int  1 2 3 4 5 6 7 8 9 10 ...
 $ Name.1: chr  "Angela Bassett" "Peter Krause" "Oliver Stark" "Aisha Hinds" ...
 $ Name.2: chr  "Athena Grant\n                  87 episodes, 2018-2022" "Bobby Nash\n                  87 episodes, 2018-2022" "Evan 'Buck' Buckley\n                  87 episodes, 2018-2022" "Henrietta 'Hen' Wilson\n                  87 episodes, 2018-2022" ...
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmMoLTEsMCwxKS8wCmBgYAoKI0Rlc2NyaWJlIHRoZSBEYXRhCmBgYHtyfQpiaWtlIDwtIHJlYWQuY3N2KCJiaWtlX3NoYXJpbmdfZGF0YS5jc3YiKQpzdHIoYmlrZSkKCiMgMS4gaHVtaWRpdHk6IGNociAtPiBpbnQKIyAyLiBzZWFvbiwgaG9saWRheSwgd29ya2luZ2RheSwgd2VhdGhlcjogaW50IC0+IGZhY3RvciAKIyAzLiBkYXRldGltZTogY2hyIC0+IFBPU0lYY3QKYGBgCiMgUmVjb2RlIGRhdGEgdHlwZQpgYGB7cn0KIyBBcHByb2FjaCBvbmUKdGFibGUoYmlrZSRodW1pZGl0eSkKCndoaWNoKGJpa2UkaHVtaWRpdHkgPT0gIng2MSIpCmJpa2VbMTQxNzcsICJodW1pZGl0eSJdIDwtIDYxCgojQWx0ZXJuYXRpdmVseSwKYmlrZVt3aGljaChiaWtlJGh1bWlkaXR5ID09ICJ4NjEiKSwgImh1bWlkaXR5Il0gPC0gNjEKCiNBcHByb2FjaCB0d28KcGFjbWFuOjpwX2xvYWQoc3RyaW5ncikKc3RyX3N1YnNldChiaWtlJGh1bWlkaXR5LCAiW2EteiwgQS1aXSIpCnN0cl9yZXBsYWNlX2FsbChiaWtlJGh1bWlkaXR5LCAieDYxIiwgIjYxIikKCmJpa2UkaHVtaWRpdHkgPC0gYXMuaW50ZWdlcihiaWtlJGh1bWlkaXR5KQpjbGFzcyhiaWtlJGh1bWlkaXR5KQpoaXN0KGJpa2UkaHVtaWRpdHkpCiAgCiAgCmBgYAoKYGBge3J9CnN0cihiaWtlJHNlYXNvbikKCmJpa2Ukc2Vhc29uIDwtIGFzLmZhY3RvcihiaWtlJHNlYXNvbikKCmJpa2Ukc2Vhc29uIDwtIGZhY3RvcihiaWtlJHNlYXNvbiwgbGV2ZWxzID0gYygxLDIsMyw0KSwKICAgICAgIGxhYmVscz0gYygic3ByaW5nIiAsICJzdW1tZXIiICwgImZhbGwiLCAid2ludGVyIikpCgpzdW1tYXJ5KGJpa2UpCmBgYAoKYGBge3J9CnBhY21hbjo6cF9sb2FkKGx1YnJpZGF0ZSkKCnN0cihiaWtlJGRhdGV0aW1lKQoKYmlrZSRkYXRldGltZSA8LSBtZHlfaG0oYmlrZSRkYXRldGltZSkKCmBgYAoKI0RlYWwgd2l0aCBtaXNzaW5nIHZhbHVlcwpgYGB7cn0Kc3VtKGlzLm5hKGJpa2UpKQoKYXBwbHkoaXMubmEoYmlrZSksIDIsIHN1bSkKCnN0cl9kZXRlY3QoYmlrZSwgIk5BIikKYGBgCgpgYGB7cn0KYmlrZV9jb21wbGV0ZSA8LSBiaWtlW3doaWNoKGlzLm5hKGJpa2Ukc291cmNlcykpLF0KCmJpa2VfY29tcGxldGUyIDwtIGJpa2VbY29tcGxldGUuY2FzZXMoYmlrZSksXQoKYmlrZV9jb21wbGV0ZTMgPC1uYS5vbWl0KGJpa2UpCmNvbXBsZXRlLmNhc2VzKGJpa2UpCmBgYAojIERlYWwgd2l0aCBzdHJpbmdzL2NoYXJhY3RlciBkYXRhCjEuIE5vcm1hbGl6ZSBjaGFyYWN0ZXJzOiBzYW1lIGNhc2UKMi4gRXh0cmEgYmxhbmsgc3BhY2VzCjMuIE1hdGhjaW5nIGNoYXJhY3RlcnMKCmBgYHtyfQp0YWJsZShiaWtlJHNvdXJjZXMpCmBgYAojTm9ybWFsaXplIGNoYXJhY3RlcnMKYGBge3J9CmJpa2Ukc291cmNlcyA8LSB0b2xvd2VyKGJpa2Ukc291cmNlcykKdGFibGUoYmlrZSRzb3VyY2VzKQpgYGAKCiNSZW1vdmUgdW5uZWVkZWQgc3BhY2VzCmBgYHtyfQpsaWJyYXJ5KHN0cmluZ3IpCmJpa2Ukc291cmNlcyA8LSBzdHJfdHJpbShiaWtlJHNvdXJjZXMsICJib3RoIikKdGFibGUoYmlrZSRzb3VyY3NlKQpgYGAKCiNQYXR0ZXJuIG1hdGNoaW5nCmBgYHtyfQpnb29nX2luZGljZXMgPC0gZ3JlcCgiZ29vZ2xlIiwgYmlrZSRzb3VyY2VzKQpnb29nX2luZGljZXMKCmJpa2Ukc291cmNlc1tnb29nX2luZGljZXNdIDwtICJnb29nbGUiCnRhYmxlKGJpa2Ukc291cmNlcykKYGBgCgpgYGB7cn0KYmFkX2RhdGEgPC0gc3RyX3N1YnNldChiaWtlX2NvbXBsZXRlJGh1bWlkaXR5LCAiW2EteiBBLVpdIikKYmlrZV9jb21wbGV0ZSRodW1pZGl0eSA8LSBzdHJfcmVwbGFjZV9hbGwoYmlrZV9jb21wbGV0ZSRodW1pZGl0eSxiYWRfZGF0YSwiNjEiKQpgYGAKCmBgYHtyfQpjYXN0PC0gcmVhZC5jc3YoInJhd19jYXN0LmNzdiIpCnN0cihjYXN0KQoKYGBgCgpgYGB7cn0KbGlicmFyeShzdHJpbmdyKQoKCnNlcGVyYXRlZF9jb2x1bW5zIDwtIHN0cl9zcGxpdF9maXhlZChjYXN0JE5hbWUuMSwgIi0iLCAxKQoKCmNhc3QkTmFtZTIgPC0gc3RyX3NwbGl0X2ZpeGVkKGNhc3QkTmFtZS4yLCAiXG4iLCAyKVssMV0KCmNhc3QkRXBpc29kZSA8LXN0cl9zcGxpdF9maXhlZChjYXN0JE5hbWUuMiwgIlxuIiwgMilbLDJdCgpzdHJfdHJpbShjYXN0JEVwaXNvZGUsIHNpZGU9ImJvdGgiKQoKcHJpbnQoY2FzdCkKYGBgCgo=