Question 4 : Data Import
bike3 <- read.csv("bike_sharing_data.csv")
bike2 <- read.table("bike_sharing_data.txt", sep = "\t", header = TRUE)
bike1 <- read.table("bike_sharing_data.csv", sep = ",", header = TRUE)
bike4 <- read.delim("bike_sharing_data.txt")
head(bike1)
## datetime season holiday workingday weather temp atemp humidity
## 1 1/1/2011 0:00 1 0 0 1 9.84 14.395 81
## 2 1/1/2011 1:00 1 0 0 1 9.02 13.635 80
## 3 1/1/2011 2:00 1 0 0 1 9.02 13.635 80
## 4 1/1/2011 3:00 1 0 0 1 9.84 14.395 75
## 5 1/1/2011 4:00 1 0 0 1 9.84 14.395 75
## 6 1/1/2011 5:00 1 0 0 2 9.84 12.880 75
## windspeed casual registered count sources
## 1 0.0000 3 13 16 ad campaign
## 2 0.0000 8 32 40 www.yahoo.com
## 3 0.0000 5 27 32 www.google.fi
## 4 0.0000 3 10 13 AD campaign
## 5 0.0000 0 1 1 Twitter
## 6 6.0032 0 1 1 www.bing.com
head(bike2)
## datetime season holiday workingday weather temp atemp humidity
## 1 1/1/2011 0:00 1 0 0 1 9.84 14.395 81
## 2 1/1/2011 1:00 1 0 0 1 9.02 13.635 80
## 3 1/1/2011 2:00 1 0 0 1 9.02 13.635 80
## 4 1/1/2011 3:00 1 0 0 1 9.84 14.395 75
## 5 1/1/2011 4:00 1 0 0 1 9.84 14.395 75
## 6 1/1/2011 5:00 1 0 0 2 9.84 12.880 75
## windspeed casual registered count sources
## 1 0.0000 3 13 16 ad campaign
## 2 0.0000 8 32 40 www.yahoo.com
## 3 0.0000 5 27 32 www.google.fi
## 4 0.0000 3 10 13 AD campaign
## 5 0.0000 0 1 1 Twitter
## 6 6.0032 0 1 1 www.bing.com
head(bike3)
## datetime season holiday workingday weather temp atemp humidity
## 1 1/1/2011 0:00 1 0 0 1 9.84 14.395 81
## 2 1/1/2011 1:00 1 0 0 1 9.02 13.635 80
## 3 1/1/2011 2:00 1 0 0 1 9.02 13.635 80
## 4 1/1/2011 3:00 1 0 0 1 9.84 14.395 75
## 5 1/1/2011 4:00 1 0 0 1 9.84 14.395 75
## 6 1/1/2011 5:00 1 0 0 2 9.84 12.880 75
## windspeed casual registered count sources
## 1 0.0000 3 13 16 ad campaign
## 2 0.0000 8 32 40 www.yahoo.com
## 3 0.0000 5 27 32 www.google.fi
## 4 0.0000 3 10 13 AD campaign
## 5 0.0000 0 1 1 Twitter
## 6 6.0032 0 1 1 www.bing.com
head(bike4)
## datetime season holiday workingday weather temp atemp humidity
## 1 1/1/2011 0:00 1 0 0 1 9.84 14.395 81
## 2 1/1/2011 1:00 1 0 0 1 9.02 13.635 80
## 3 1/1/2011 2:00 1 0 0 1 9.02 13.635 80
## 4 1/1/2011 3:00 1 0 0 1 9.84 14.395 75
## 5 1/1/2011 4:00 1 0 0 1 9.84 14.395 75
## 6 1/1/2011 5:00 1 0 0 2 9.84 12.880 75
## windspeed casual registered count sources
## 1 0.0000 3 13 16 ad campaign
## 2 0.0000 8 32 40 www.yahoo.com
## 3 0.0000 5 27 32 www.google.fi
## 4 0.0000 3 10 13 AD campaign
## 5 0.0000 0 1 1 Twitter
## 6 6.0032 0 1 1 www.bing.com
Question 5: number of observations and variables in dataset
dim(bike3)
## [1] 17379 13
Question 6: humidity
head(bike3)
## datetime season holiday workingday weather temp atemp humidity
## 1 1/1/2011 0:00 1 0 0 1 9.84 14.395 81
## 2 1/1/2011 1:00 1 0 0 1 9.02 13.635 80
## 3 1/1/2011 2:00 1 0 0 1 9.02 13.635 80
## 4 1/1/2011 3:00 1 0 0 1 9.84 14.395 75
## 5 1/1/2011 4:00 1 0 0 1 9.84 14.395 75
## 6 1/1/2011 5:00 1 0 0 2 9.84 12.880 75
## windspeed casual registered count sources
## 1 0.0000 3 13 16 ad campaign
## 2 0.0000 8 32 40 www.yahoo.com
## 3 0.0000 5 27 32 www.google.fi
## 4 0.0000 3 10 13 AD campaign
## 5 0.0000 0 1 1 Twitter
## 6 6.0032 0 1 1 www.bing.com
str(bike3)
## 'data.frame': 17379 obs. of 13 variables:
## $ datetime : chr "1/1/2011 0:00" "1/1/2011 1:00" "1/1/2011 2:00" "1/1/2011 3:00" ...
## $ season : int 1 1 1 1 1 1 1 1 1 1 ...
## $ holiday : int 0 0 0 0 0 0 0 0 0 0 ...
## $ workingday: int 0 0 0 0 0 0 0 0 0 0 ...
## $ weather : int 1 1 1 1 1 2 1 1 1 1 ...
## $ temp : num 9.84 9.02 9.02 9.84 9.84 ...
## $ atemp : num 14.4 13.6 13.6 14.4 14.4 ...
## $ humidity : chr "81" "80" "80" "75" ...
## $ windspeed : num 0 0 0 0 0 ...
## $ casual : int 3 8 5 3 0 0 2 1 1 8 ...
## $ registered: int 13 32 27 10 1 1 0 2 7 6 ...
## $ count : int 16 40 32 13 1 1 2 3 8 14 ...
## $ sources : chr "ad campaign" "www.yahoo.com" "www.google.fi" "AD campaign" ...
Question 7: value of season
bike3[6251, "season"]
## [1] 4
Question 8: number of observations
dim(subset(bike3, season==4))
## [1] 4232 13
Question 9: logical operators
subset_data <- bike3[bike3$season == 1 & bike3$windspeed > 0.5, ]
head(subset_data)
## datetime season holiday workingday weather temp atemp humidity
## 6 1/1/2011 5:00 1 0 0 2 9.84 12.880 75
## 11 1/1/2011 10:00 1 0 0 1 15.58 19.695 76
## 12 1/1/2011 11:00 1 0 0 1 14.76 16.665 81
## 13 1/1/2011 12:00 1 0 0 1 17.22 21.210 77
## 14 1/1/2011 13:00 1 0 0 2 18.86 22.725 72
## 15 1/1/2011 14:00 1 0 0 2 18.86 22.725 72
## windspeed casual registered count sources
## 6 6.0032 0 1 1 www.bing.com
## 11 16.9979 12 24 36 www.bing.com
## 12 19.0012 26 30 56 www.yahoo.com
## 13 19.0012 29 55 84 www.google.fi
## 14 19.9995 47 47 94 AD campaign
## 15 19.0012 35 71 106 www.google.co.uk
subset_data_match <- bike3[bike3$season %in% c(1, 3), ]
head(subset_data_match)
## datetime season holiday workingday weather temp atemp humidity
## 1 1/1/2011 0:00 1 0 0 1 9.84 14.395 81
## 2 1/1/2011 1:00 1 0 0 1 9.02 13.635 80
## 3 1/1/2011 2:00 1 0 0 1 9.02 13.635 80
## 4 1/1/2011 3:00 1 0 0 1 9.84 14.395 75
## 5 1/1/2011 4:00 1 0 0 1 9.84 14.395 75
## 6 1/1/2011 5:00 1 0 0 2 9.84 12.880 75
## windspeed casual registered count sources
## 1 0.0000 3 13 16 ad campaign
## 2 0.0000 8 32 40 www.yahoo.com
## 3 0.0000 5 27 32 www.google.fi
## 4 0.0000 3 10 13 AD campaign
## 5 0.0000 0 1 1 Twitter
## 6 6.0032 0 1 1 www.bing.com
Question 10: Season and Wind
dim(subset(bike3, windspeed >= 40 & season %in% c(1,4)))
## [1] 46 13