#Extract the Data
bike1 <- read.table("bike_sharing_data.csv", sep=",", header=TRUE)
bike2 <- read.table("bike_sharing_data.txt", sep="\t", header=TRUE)
bike3 <- read.csv("bike_sharing_data.csv")
bike4 <- read.delim("bike_sharing_data.txt")
#What is the total number of observation and variables
str(bike1)
## 'data.frame': 17379 obs. of 13 variables:
## $ datetime : chr "1/1/2011 0:00" "1/1/2011 1:00" "1/1/2011 2:00" "1/1/2011 3:00" ...
## $ season : int 1 1 1 1 1 1 1 1 1 1 ...
## $ holiday : int 0 0 0 0 0 0 0 0 0 0 ...
## $ workingday: int 0 0 0 0 0 0 0 0 0 0 ...
## $ weather : int 1 1 1 1 1 2 1 1 1 1 ...
## $ temp : num 9.84 9.02 9.02 9.84 9.84 ...
## $ atemp : num 14.4 13.6 13.6 14.4 14.4 ...
## $ humidity : chr "81" "80" "80" "75" ...
## $ windspeed : num 0 0 0 0 0 ...
## $ casual : int 3 8 5 3 0 0 2 1 1 8 ...
## $ registered: int 13 32 27 10 1 1 0 2 7 6 ...
## $ count : int 16 40 32 13 1 1 2 3 8 14 ...
## $ sources : chr "ad campaign" "www.yahoo.com" "www.google.fi" "AD campaign" ...
#Indexing to find the value of season in row 6251
bike2 [6251,]
## datetime season holiday workingday weather temp atemp humidity
## 6251 9/23/2011 0:00 4 0 1 2 25.42 27.275 94
## windspeed casual registered count sources
## 6251 6.0032 5 23 28 Ad Campaign
#Subset the data to find how many observations have season = winter
head(subset(bike2, season >=4), 10)
## datetime season holiday workingday weather temp atemp humidity
## 6251 9/23/2011 0:00 4 0 1 2 25.42 27.275 94
## 6252 9/23/2011 1:00 4 0 1 2 24.60 25.000 100
## 6253 9/23/2011 2:00 4 0 1 2 24.60 25.000 100
## 6254 9/23/2011 3:00 4 0 1 2 24.60 25.000 100
## 6255 9/23/2011 4:00 4 0 1 3 24.60 25.000 100
## 6256 9/23/2011 5:00 4 0 1 2 25.42 27.275 94
## 6257 9/23/2011 6:00 4 0 1 2 25.42 27.275 94
## 6258 9/23/2011 7:00 4 0 1 3 25.42 27.275 94
## 6259 9/23/2011 8:00 4 0 1 3 25.42 27.275 94
## 6260 9/23/2011 9:00 4 0 1 3 25.42 25.760 100
## windspeed casual registered count sources
## 6251 6.0032 5 23 28 Ad Campaign
## 6252 0.0000 2 11 13 facebook page
## 6253 7.0015 1 8 9 ad campaign
## 6254 0.0000 1 4 5 www.bing.com
## 6255 0.0000 1 4 5 direct
## 6256 0.0000 1 16 17 AD campaign
## 6257 6.0032 4 62 66 ad campaign
## 6258 8.9981 6 118 124 www.bing.com
## 6259 8.9981 10 224 234 facebook page
## 6260 8.9981 7 97 104 Ad Campaign
#Subset -> observations have “high” wind thread condition or above in Winter or Spring
head(subset(bike2, season %in% c(1, 4) & windspeed >= 40 & windspeed <= 58), 10)
## datetime season holiday workingday weather temp atemp humidity
## 1008 2/14/2011 15:00 1 0 1 1 22.96 26.515 21
## 1010 2/14/2011 17:00 1 0 1 1 18.86 22.725 33
## 1011 2/14/2011 18:00 1 0 1 1 16.40 20.455 40
## 1015 2/14/2011 22:00 1 0 1 1 13.94 14.395 46
## 1018 2/15/2011 1:00 1 0 1 1 12.30 12.120 42
## 1019 2/15/2011 2:00 1 0 1 1 11.48 11.365 41
## 1120 2/19/2011 9:00 1 0 0 1 16.40 20.455 16
## 1124 2/19/2011 13:00 1 0 0 1 18.04 21.970 16
## 1125 2/19/2011 14:00 1 0 0 1 18.86 22.725 15
## 1126 2/19/2011 15:00 1 0 0 1 18.04 21.970 16
## windspeed casual registered count sources
## 1008 43.9989 19 71 90 www.google.co.uk
## 1010 40.9973 25 218 243 ad campaign
## 1011 40.9973 11 194 205 ad campaign
## 1015 43.9989 1 44 45 www.google.co.uk
## 1018 51.9987 0 5 5 www.google.fi
## 1019 46.0022 1 2 3 www.google.fi
## 1120 43.9989 18 37 55 Ad Campaign
## 1124 40.9973 52 103 155 Twitter
## 1125 43.9989 102 94 196 Twitter
## 1126 50.0021 84 87 171 ad campaign