Extracting the bike sharing datasets
bike1 <- read.table("bike_sharing_data.csv", sep=",", header=TRUE)
bike2 <- read.table("bike_sharing_data.txt", sep="\t", header=TRUE)
bike3 <- read.csv("bike_sharing_data.csv")
bike4 <- read.delim("bike_sharing_data.txt")
Total number of observations and variables and data type of humidity
str(bike1)
## 'data.frame': 17379 obs. of 13 variables:
## $ datetime : chr "1/1/2011 0:00" "1/1/2011 1:00" "1/1/2011 2:00" "1/1/2011 3:00" ...
## $ season : int 1 1 1 1 1 1 1 1 1 1 ...
## $ holiday : int 0 0 0 0 0 0 0 0 0 0 ...
## $ workingday: int 0 0 0 0 0 0 0 0 0 0 ...
## $ weather : int 1 1 1 1 1 2 1 1 1 1 ...
## $ temp : num 9.84 9.02 9.02 9.84 9.84 ...
## $ atemp : num 14.4 13.6 13.6 14.4 14.4 ...
## $ humidity : chr "81" "80" "80" "75" ...
## $ windspeed : num 0 0 0 0 0 ...
## $ casual : int 3 8 5 3 0 0 2 1 1 8 ...
## $ registered: int 13 32 27 10 1 1 0 2 7 6 ...
## $ count : int 16 40 32 13 1 1 2 3 8 14 ...
## $ sources : chr "ad campaign" "www.yahoo.com" "www.google.fi" "AD campaign" ...
Value of season in row 6251
bike1[6251,]
## datetime season holiday workingday weather temp atemp humidity
## 6251 9/23/2011 0:00 4 0 1 2 25.42 27.275 94
## windspeed casual registered count sources
## 6251 6.0032 5 23 28 Ad Campaign
Number of winter observations, 4=winter
table(bike1$season)
##
## 1 2 3 4
## 4242 4409 4496 4232
High wind condition or above in winter or spring
subset_bike <- subset(bike1, (windspeed >= 40) & (season %in% c(1, 4)))
str(subset_bike)
## 'data.frame': 46 obs. of 13 variables:
## $ datetime : chr "2/14/2011 15:00" "2/14/2011 17:00" "2/14/2011 18:00" "2/14/2011 22:00" ...
## $ season : int 1 1 1 1 1 1 1 1 1 1 ...
## $ holiday : int 0 0 0 0 0 0 0 0 0 0 ...
## $ workingday: int 1 1 1 1 1 1 0 0 0 0 ...
## $ weather : int 1 1 1 1 1 1 1 1 1 1 ...
## $ temp : num 23 18.9 16.4 13.9 12.3 ...
## $ atemp : num 26.5 22.7 20.5 14.4 12.1 ...
## $ humidity : chr "21" "33" "40" "46" ...
## $ windspeed : num 44 41 41 44 52 ...
## $ casual : int 19 25 11 1 0 1 18 52 102 84 ...
## $ registered: int 71 218 194 44 5 2 37 103 94 87 ...
## $ count : int 90 243 205 45 5 3 55 155 196 171 ...
## $ sources : chr "www.google.co.uk" "ad campaign" "ad campaign" "www.google.co.uk" ...