Question 4 : Data Import

bike3 <- read.csv("bike_sharing_data.csv")
bike2 <- read.table("bike_sharing_data.txt", sep = "\t", header = TRUE)
bike1 <- read.table("bike_sharing_data.csv", sep = ",", header = TRUE)
bike4 <- read.delim("bike_sharing_data.txt")
head(bike1)
##        datetime season holiday workingday weather temp  atemp humidity
## 1 1/1/2011 0:00      1       0          0       1 9.84 14.395       81
## 2 1/1/2011 1:00      1       0          0       1 9.02 13.635       80
## 3 1/1/2011 2:00      1       0          0       1 9.02 13.635       80
## 4 1/1/2011 3:00      1       0          0       1 9.84 14.395       75
## 5 1/1/2011 4:00      1       0          0       1 9.84 14.395       75
## 6 1/1/2011 5:00      1       0          0       2 9.84 12.880       75
##   windspeed casual registered count       sources
## 1    0.0000      3         13    16   ad campaign
## 2    0.0000      8         32    40 www.yahoo.com
## 3    0.0000      5         27    32 www.google.fi
## 4    0.0000      3         10    13   AD campaign
## 5    0.0000      0          1     1       Twitter
## 6    6.0032      0          1     1  www.bing.com
head(bike2)
##        datetime season holiday workingday weather temp  atemp humidity
## 1 1/1/2011 0:00      1       0          0       1 9.84 14.395       81
## 2 1/1/2011 1:00      1       0          0       1 9.02 13.635       80
## 3 1/1/2011 2:00      1       0          0       1 9.02 13.635       80
## 4 1/1/2011 3:00      1       0          0       1 9.84 14.395       75
## 5 1/1/2011 4:00      1       0          0       1 9.84 14.395       75
## 6 1/1/2011 5:00      1       0          0       2 9.84 12.880       75
##   windspeed casual registered count       sources
## 1    0.0000      3         13    16   ad campaign
## 2    0.0000      8         32    40 www.yahoo.com
## 3    0.0000      5         27    32 www.google.fi
## 4    0.0000      3         10    13   AD campaign
## 5    0.0000      0          1     1       Twitter
## 6    6.0032      0          1     1  www.bing.com
head(bike3)
##        datetime season holiday workingday weather temp  atemp humidity
## 1 1/1/2011 0:00      1       0          0       1 9.84 14.395       81
## 2 1/1/2011 1:00      1       0          0       1 9.02 13.635       80
## 3 1/1/2011 2:00      1       0          0       1 9.02 13.635       80
## 4 1/1/2011 3:00      1       0          0       1 9.84 14.395       75
## 5 1/1/2011 4:00      1       0          0       1 9.84 14.395       75
## 6 1/1/2011 5:00      1       0          0       2 9.84 12.880       75
##   windspeed casual registered count       sources
## 1    0.0000      3         13    16   ad campaign
## 2    0.0000      8         32    40 www.yahoo.com
## 3    0.0000      5         27    32 www.google.fi
## 4    0.0000      3         10    13   AD campaign
## 5    0.0000      0          1     1       Twitter
## 6    6.0032      0          1     1  www.bing.com
head(bike4)
##        datetime season holiday workingday weather temp  atemp humidity
## 1 1/1/2011 0:00      1       0          0       1 9.84 14.395       81
## 2 1/1/2011 1:00      1       0          0       1 9.02 13.635       80
## 3 1/1/2011 2:00      1       0          0       1 9.02 13.635       80
## 4 1/1/2011 3:00      1       0          0       1 9.84 14.395       75
## 5 1/1/2011 4:00      1       0          0       1 9.84 14.395       75
## 6 1/1/2011 5:00      1       0          0       2 9.84 12.880       75
##   windspeed casual registered count       sources
## 1    0.0000      3         13    16   ad campaign
## 2    0.0000      8         32    40 www.yahoo.com
## 3    0.0000      5         27    32 www.google.fi
## 4    0.0000      3         10    13   AD campaign
## 5    0.0000      0          1     1       Twitter
## 6    6.0032      0          1     1  www.bing.com

Question 5: number of observations and variables in dataset

dim(bike3)
## [1] 17379    13

Question 6: humidity

head(bike3)
##        datetime season holiday workingday weather temp  atemp humidity
## 1 1/1/2011 0:00      1       0          0       1 9.84 14.395       81
## 2 1/1/2011 1:00      1       0          0       1 9.02 13.635       80
## 3 1/1/2011 2:00      1       0          0       1 9.02 13.635       80
## 4 1/1/2011 3:00      1       0          0       1 9.84 14.395       75
## 5 1/1/2011 4:00      1       0          0       1 9.84 14.395       75
## 6 1/1/2011 5:00      1       0          0       2 9.84 12.880       75
##   windspeed casual registered count       sources
## 1    0.0000      3         13    16   ad campaign
## 2    0.0000      8         32    40 www.yahoo.com
## 3    0.0000      5         27    32 www.google.fi
## 4    0.0000      3         10    13   AD campaign
## 5    0.0000      0          1     1       Twitter
## 6    6.0032      0          1     1  www.bing.com
str(bike3)
## 'data.frame':    17379 obs. of  13 variables:
##  $ datetime  : chr  "1/1/2011 0:00" "1/1/2011 1:00" "1/1/2011 2:00" "1/1/2011 3:00" ...
##  $ season    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ holiday   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ workingday: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ weather   : int  1 1 1 1 1 2 1 1 1 1 ...
##  $ temp      : num  9.84 9.02 9.02 9.84 9.84 ...
##  $ atemp     : num  14.4 13.6 13.6 14.4 14.4 ...
##  $ humidity  : chr  "81" "80" "80" "75" ...
##  $ windspeed : num  0 0 0 0 0 ...
##  $ casual    : int  3 8 5 3 0 0 2 1 1 8 ...
##  $ registered: int  13 32 27 10 1 1 0 2 7 6 ...
##  $ count     : int  16 40 32 13 1 1 2 3 8 14 ...
##  $ sources   : chr  "ad campaign" "www.yahoo.com" "www.google.fi" "AD campaign" ...

Question 7: value of season

bike3[6251, "season"]
## [1] 4

Question 8: number of observations

dim(subset(bike3, season==4))
## [1] 4232   13

Question 9: logical operators

subset_data <- bike3[bike3$season == 1 & bike3$windspeed > 0.5, ]
head(subset_data)
##          datetime season holiday workingday weather  temp  atemp humidity
## 6   1/1/2011 5:00      1       0          0       2  9.84 12.880       75
## 11 1/1/2011 10:00      1       0          0       1 15.58 19.695       76
## 12 1/1/2011 11:00      1       0          0       1 14.76 16.665       81
## 13 1/1/2011 12:00      1       0          0       1 17.22 21.210       77
## 14 1/1/2011 13:00      1       0          0       2 18.86 22.725       72
## 15 1/1/2011 14:00      1       0          0       2 18.86 22.725       72
##    windspeed casual registered count          sources
## 6     6.0032      0          1     1     www.bing.com
## 11   16.9979     12         24    36     www.bing.com
## 12   19.0012     26         30    56    www.yahoo.com
## 13   19.0012     29         55    84    www.google.fi
## 14   19.9995     47         47    94      AD campaign
## 15   19.0012     35         71   106 www.google.co.uk
subset_data_match <- bike3[bike3$season %in% c(1, 3), ]
head(subset_data_match)
##        datetime season holiday workingday weather temp  atemp humidity
## 1 1/1/2011 0:00      1       0          0       1 9.84 14.395       81
## 2 1/1/2011 1:00      1       0          0       1 9.02 13.635       80
## 3 1/1/2011 2:00      1       0          0       1 9.02 13.635       80
## 4 1/1/2011 3:00      1       0          0       1 9.84 14.395       75
## 5 1/1/2011 4:00      1       0          0       1 9.84 14.395       75
## 6 1/1/2011 5:00      1       0          0       2 9.84 12.880       75
##   windspeed casual registered count       sources
## 1    0.0000      3         13    16   ad campaign
## 2    0.0000      8         32    40 www.yahoo.com
## 3    0.0000      5         27    32 www.google.fi
## 4    0.0000      3         10    13   AD campaign
## 5    0.0000      0          1     1       Twitter
## 6    6.0032      0          1     1  www.bing.com

Question 10: Season and Wind

dim(subset(bike3, windspeed >= 40 & season %in% c(1,4)))
## [1] 46 13