#title: "Bike share_Mechine learning: linear regression"
#output: html_notebook
#Author: Zhuangfang Yi. https://geoyi.org
setwd("C:/Data Science Fundation with R/R-Course-HTML-Notes/R-Course-HTML-Notes/R-for-Data-Science-and-Machine-Learning/Training Exercises/Machine Learning Projects/CSV files for ML Projects")
list.files("C:/Data Science Fundation with R/R-Course-HTML-Notes/R-Course-HTML-Notes/R-for-Data-Science-and-Machine-Learning/Training Exercises/Machine Learning Projects/CSV files for ML Projects")
[1] "Adult salary prediction_logistic regression.nb.html"
[2] "Adult salary prediction_logistic regression.R"
[3] "Adult salary prediction_logistic regression.Rmd"
[4] "adult_sal.csv"
[5] "Adult_salary_prediction_logistic_regression_files"
[6] "bank_note_data.csv"
[7] "bikeshare.csv"
[8] "Bikeshare.nb.html"
[9] "Bikeshare.r"
[10] "Bikeshare.rmd"
[11] "bikeshare_DC.nb.html"
[12] "bikeshare_DC.Rmd"
[13] "GEO.ORG_BLOG POST.pptx"
[14] "loan_data.csv"
[15] "Rplot.png"
[16] "Rplot01.png"
[17] "rsconnect"
[18] "winequality-red.csv"
[19] "winequality-white.csv"
Adult salary prediction_logistic regression.nb.html
Adult salary prediction_logistic regression.R
Adult salary prediction_logistic regression.Rmd
adult_sal.csv
Adult_salary_prediction_logistic_regression_files
bank_note_data.csv
bikeshare.csv
Bikeshare.nb.html
Bikeshare.r
Bikeshare.rmd
bikeshare_DC.nb.html
bikeshare_DC.Rmd
GEO.ORG_BLOG POST.pptx
loan_data.csv
Rplot.png
Rplot01.png
rsconnect
winequality-red.csv
winequality-white.csv
df <- read.csv("bikeshare.csv", sep = ",")
head(df, 4)
datetime season holiday workingday weather temp atemp
1 2011-01-01 00:00:00 1 0 0 1 9.84 14.395
2 2011-01-01 01:00:00 1 0 0 1 9.02 13.635
3 2011-01-01 02:00:00 1 0 0 1 9.02 13.635
4 2011-01-01 03:00:00 1 0 0 1 9.84 14.395
humidity windspeed casual registered count
1 81 0 3 13 16
2 80 0 8 32 40
3 80 0 5 27 32
4 75 0 3 10 13
library(ggplot2)
library(ggthemes)
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(corrgram)
library(plotly)
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
Sys.setenv("plotly_username"="geospatialanalystyi")
Sys.setenv("plotly_api_key"="kavplmotix")
Sys.setenv("plotly_domain"="https://plotly.your-company.com")
#check out the correlation between variables
#Bike.Num <- sapply(df, is.numeric)
#Bike.cor <- cor(df[,Bike.Num])
#corrplot::corrplot(Bike.cor, method = "color")
p1 <- corrgram(df, order = TRUE, lower.panel = panel.shade, upper.panel = panel.pie, text.panel = panel.txt)

#plotly::plotly(p1)
ggplot(df, aes(x=temp, y=count)) + geom_point(aes(colour = temp),alpha=0.5)

#covert date "2013-03-11 07:00" to POSIXct format "2013-03"
df$datetime <- as.POSIXct(df$datetime)
ggplot(df, aes(x=datetime, y=count)) + geom_point(aes(colour=temp),alpha=0.5,position=position_jitter(w=2, h=0)) + scale_color_continuous(low='#55D8CE',high='#FF6E2E')

col.tem.count <- cor(df[,c('temp', 'count')])
col.tem.count
temp count
temp 1.0000000 0.3944536
count 0.3944536 1.0000000
ggplot(df, aes(x=factor(season), y=count)) + geom_boxplot(aes(colour=factor(season)))

df$hour <- format(df$datetime,"%H")
head(df)
datetime season holiday workingday weather temp atemp
1 2011-01-01 00:00:00 1 0 0 1 9.84 14.395
2 2011-01-01 01:00:00 1 0 0 1 9.02 13.635
3 2011-01-01 02:00:00 1 0 0 1 9.02 13.635
4 2011-01-01 03:00:00 1 0 0 1 9.84 14.395
5 2011-01-01 04:00:00 1 0 0 1 9.84 14.395
6 2011-01-01 05:00:00 1 0 0 2 9.84 12.880
humidity windspeed casual registered count hour
1 81 0.0000 3 13 16 00
2 80 0.0000 8 32 40 01
3 80 0.0000 5 27 32 02
4 75 0.0000 3 10 13 03
5 75 0.0000 0 1 1 04
6 75 6.0032 0 1 1 05
workday.bike <- df %>%
filter(workingday == 1)
#color selection was from http://www.color-hex.com/color-wheel/, I use color 1 combination.
ggplot(workday.bike, aes(x=hour, y=count)) + geom_point(aes(color=temp),position=position_jitter(w=1.5, h=0.5), alpha=0.5) + scale_color_gradientn(colors = c("#161c64","#1827e2",'#983cec','#ad3cec', '#ea3cec','#ec3c6c','#ec613c','#ec863c','#eca33c','#ecd13c'))

NotWork.day <- df %>%
filter(workingday == 0)
ggplot(NotWork.day, aes(x=hour, y=count)) + geom_point(aes(color=temp),position=position_jitter(w=1.5, h=0.5), alpha=0.5) + scale_color_gradientn(colors = c("#161c64","#1827e2",'#983cec','#ad3cec', '#ea3cec','#ec3c6c','#ec613c','#ec863c','#eca33c','#ecd13c'))

temp.model <- lm(count ~ temp, data = df)
summary(temp.model)
Call:
lm(formula = count ~ temp, data = df)
Residuals:
Min 1Q Median 3Q Max
-293.32 -112.36 -33.36 78.98 741.44
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.0462 4.4394 1.362 0.173
temp 9.1705 0.2048 44.783 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 166.5 on 10884 degrees of freedom
Multiple R-squared: 0.1556, Adjusted R-squared: 0.1555
F-statistic: 2006 on 1 and 10884 DF, p-value: < 2.2e-16
#make prediction using the linear regression model
print("How many bikes do we need when the temprature is 30?")
[1] "How many bikes do we need when the temprature is 30?"
temp.test <- data.frame(temp=30)
predict(temp.model,temp.test)
1
281.1624
df$hour <- sapply(df$hour, as.numeric)
P.bike <- select(df, season : hour)
bike.num <- lapply(P.bike, as.numeric)
str(bike.num)
List of 12
$ season : num [1:10886] 1 1 1 1 1 1 1 1 1 1 ...
$ holiday : num [1:10886] 0 0 0 0 0 0 0 0 0 0 ...
$ workingday: num [1:10886] 0 0 0 0 0 0 0 0 0 0 ...
$ weather : num [1:10886] 1 1 1 1 1 2 1 1 1 1 ...
$ temp : num [1:10886] 9.84 9.02 9.02 9.84 9.84 ...
$ atemp : num [1:10886] 14.4 13.6 13.6 14.4 14.4 ...
$ humidity : num [1:10886] 81 80 80 75 75 75 80 86 75 76 ...
$ windspeed : num [1:10886] 0 0 0 0 0 ...
$ casual : num [1:10886] 3 8 5 3 0 0 2 1 1 8 ...
$ registered: num [1:10886] 13 32 27 10 1 1 0 2 7 6 ...
$ count : num [1:10886] 16 40 32 13 1 1 2 3 8 14 ...
$ hour : num [1:10886] 0 1 2 3 4 5 6 7 8 9 ...
bike.model <- lm(count ~., data =bike.num)
summary(bike.model)
Call:
lm(formula = count ~ ., data = bike.num)
Residuals:
Min 1Q Median 3Q Max
-4.963e-11 -1.700e-14 0.000e+00 1.700e-14 3.672e-11
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.762e-13 4.477e-14 -3.936e+00 8.33e-05 ***
season -2.061e-13 6.949e-15 -2.966e+01 < 2e-16 ***
holiday 2.869e-13 4.466e-14 6.423e+00 1.39e-10 ***
workingday 3.457e-13 1.836e-14 1.883e+01 < 2e-16 ***
weather 7.914e-14 1.267e-14 6.245e+00 4.39e-10 ***
temp -4.186e-14 5.498e-15 -7.613e+00 2.91e-14 ***
atemp -1.750e-14 5.060e-15 -3.459e+00 0.000545 ***
humidity 1.139e-14 4.864e-16 2.341e+01 < 2e-16 ***
windspeed 6.567e-16 9.636e-16 6.820e-01 0.495534
casual 1.000e+00 2.148e-16 4.655e+15 < 2e-16 ***
registered 1.000e+00 6.118e-17 1.635e+16 < 2e-16 ***
hour 5.611e-16 1.159e-15 4.840e-01 0.628477
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 7.49e-13 on 10874 degrees of freedom
Multiple R-squared: 1, Adjusted R-squared: 1
F-statistic: 5.788e+31 on 11 and 10874 DF, p-value: < 2.2e-16
At this poit we could make a prediction/assuption: Today, it’s summer time, and holiday; the weather is clear, few clouds; temp is 25; but air temp is about 28; humidity is about 70%; weedspeed is about 10; and it’s close to 10am now. So we could predict how many bikes are needed for the specific hour, day and weather.
bike.test <- data.frame(season=3, holiday =1, workingday = 0, weather = 1, temp=25, atemp = 28, humidity = 70.0, windspeed = 10.0, hour = 10, casual =11, registered = 140)
predict(bike.model, bike.test)
1
151
LS0tDQp0aXRsZTogIkJpa2Ugc2hhcmVfTWVjaGluZSBsZWFybmluZzogbGluZWFyIHJlZ3Jlc3Npb24iDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCkF1dGhvcjogWmh1YW5nZmFuZyBZaS4gaHR0cHM6Ly9nZW95aS5vcmcNCi0tLQ0KDQpgYGB7cn0NCiN0aXRsZTogIkJpa2Ugc2hhcmVfTWVjaGluZSBsZWFybmluZzogbGluZWFyIHJlZ3Jlc3Npb24iDQojb3V0cHV0OiBodG1sX25vdGVib29rDQojQXV0aG9yOiBaaHVhbmdmYW5nIFlpLiBodHRwczovL2dlb3lpLm9yZw0KDQpzZXR3ZCgiQzovRGF0YSBTY2llbmNlIEZ1bmRhdGlvbiB3aXRoIFIvUi1Db3Vyc2UtSFRNTC1Ob3Rlcy9SLUNvdXJzZS1IVE1MLU5vdGVzL1ItZm9yLURhdGEtU2NpZW5jZS1hbmQtTWFjaGluZS1MZWFybmluZy9UcmFpbmluZyBFeGVyY2lzZXMvTWFjaGluZSBMZWFybmluZyBQcm9qZWN0cy9DU1YgZmlsZXMgZm9yIE1MIFByb2plY3RzIikNCmxpc3QuZmlsZXMoIkM6L0RhdGEgU2NpZW5jZSBGdW5kYXRpb24gd2l0aCBSL1ItQ291cnNlLUhUTUwtTm90ZXMvUi1Db3Vyc2UtSFRNTC1Ob3Rlcy9SLWZvci1EYXRhLVNjaWVuY2UtYW5kLU1hY2hpbmUtTGVhcm5pbmcvVHJhaW5pbmcgRXhlcmNpc2VzL01hY2hpbmUgTGVhcm5pbmcgUHJvamVjdHMvQ1NWIGZpbGVzIGZvciBNTCBQcm9qZWN0cyIpDQpkZiA8LSByZWFkLmNzdigiYmlrZXNoYXJlLmNzdiIsIHNlcCA9ICIsIikNCmhlYWQoZGYsIDQpDQpgYGANCg0KYGBge3J9DQpsaWJyYXJ5KGdncGxvdDIpDQpsaWJyYXJ5KGdndGhlbWVzKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkoY29ycmdyYW0pDQpsaWJyYXJ5KHBsb3RseSkNClN5cy5zZXRlbnYoInBsb3RseV91c2VybmFtZSI9Imdlb3NwYXRpYWxhbmFseXN0eWkiKQ0KU3lzLnNldGVudigicGxvdGx5X2FwaV9rZXkiPSJrYXZwbG1vdGl4IikNClN5cy5zZXRlbnYoInBsb3RseV9kb21haW4iPSJodHRwczovL3Bsb3RseS55b3VyLWNvbXBhbnkuY29tIikNCg0KI2NoZWNrIG91dCB0aGUgY29ycmVsYXRpb24gYmV0d2VlbiB2YXJpYWJsZXMNCiNCaWtlLk51bSA8LSBzYXBwbHkoZGYsIGlzLm51bWVyaWMpDQojQmlrZS5jb3IgPC0gY29yKGRmWyxCaWtlLk51bV0pDQojY29ycnBsb3Q6OmNvcnJwbG90KEJpa2UuY29yLCBtZXRob2QgPSAiY29sb3IiKQ0KcDEgPC0gY29ycmdyYW0oZGYsIG9yZGVyID0gVFJVRSwgbG93ZXIucGFuZWwgPSBwYW5lbC5zaGFkZSwgdXBwZXIucGFuZWwgPSBwYW5lbC5waWUsIHRleHQucGFuZWwgPSBwYW5lbC50eHQpDQojcGxvdGx5OjpwbG90bHkocDEpDQpgYGANCg0KYGBge3J9DQpnZ3Bsb3QoZGYsIGFlcyh4PXRlbXAsIHk9Y291bnQpKSArIGdlb21fcG9pbnQoYWVzKGNvbG91ciA9IHRlbXApLGFscGhhPTAuNSkgDQpgYGANCmBgYHtyfQ0KI2NvdmVydCBkYXRlICIyMDEzLTAzLTExIDA3OjAwIiB0byBQT1NJWGN0IGZvcm1hdCAiMjAxMy0wMyIgDQpkZiRkYXRldGltZSA8LSBhcy5QT1NJWGN0KGRmJGRhdGV0aW1lKQ0KZ2dwbG90KGRmLCBhZXMoeD1kYXRldGltZSwgeT1jb3VudCkpICsgZ2VvbV9wb2ludChhZXMoY29sb3VyPXRlbXApLGFscGhhPTAuNSxwb3NpdGlvbj1wb3NpdGlvbl9qaXR0ZXIodz0yLCBoPTApKSArIHNjYWxlX2NvbG9yX2NvbnRpbnVvdXMobG93PScjNTVEOENFJyxoaWdoPScjRkY2RTJFJykgDQpgYGANCmBgYHtyfQ0KY29sLnRlbS5jb3VudCA8LSBjb3IoZGZbLGMoJ3RlbXAnLCAnY291bnQnKV0pDQpjb2wudGVtLmNvdW50DQpgYGANCmBgYHtyfQ0KZ2dwbG90KGRmLCBhZXMoeD1mYWN0b3Ioc2Vhc29uKSwgeT1jb3VudCkpICsgZ2VvbV9ib3hwbG90KGFlcyhjb2xvdXI9ZmFjdG9yKHNlYXNvbikpKQ0KYGBgDQpgYGB7cn0NCmRmJGhvdXIgPC0gZm9ybWF0KGRmJGRhdGV0aW1lLCIlSCIpDQpoZWFkKGRmKQ0KYGBgDQoNCmBgYHtyfQ0Kd29ya2RheS5iaWtlIDwtIGRmICU+JQ0KICBmaWx0ZXIod29ya2luZ2RheSA9PSAxKQ0KI2NvbG9yIHNlbGVjdGlvbiB3YXMgZnJvbSBodHRwOi8vd3d3LmNvbG9yLWhleC5jb20vY29sb3Itd2hlZWwvLCBJIHVzZSBjb2xvciAxIGNvbWJpbmF0aW9uLg0KZ2dwbG90KHdvcmtkYXkuYmlrZSwgYWVzKHg9aG91ciwgeT1jb3VudCkpICsgZ2VvbV9wb2ludChhZXMoY29sb3I9dGVtcCkscG9zaXRpb249cG9zaXRpb25faml0dGVyKHc9MS41LCBoPTAuNSksIGFscGhhPTAuNSkgKyBzY2FsZV9jb2xvcl9ncmFkaWVudG4oY29sb3JzID0gYygiIzE2MWM2NCIsIiMxODI3ZTIiLCcjOTgzY2VjJywnI2FkM2NlYycsICcjZWEzY2VjJywnI2VjM2M2YycsJyNlYzYxM2MnLCcjZWM4NjNjJywnI2VjYTMzYycsJyNlY2QxM2MnKSkNCmBgYA0KYGBge3J9DQpOb3RXb3JrLmRheSA8LSBkZiAlPiUNCiAgZmlsdGVyKHdvcmtpbmdkYXkgPT0gMCkNCmdncGxvdChOb3RXb3JrLmRheSwgYWVzKHg9aG91ciwgeT1jb3VudCkpICsgZ2VvbV9wb2ludChhZXMoY29sb3I9dGVtcCkscG9zaXRpb249cG9zaXRpb25faml0dGVyKHc9MS41LCBoPTAuNSksIGFscGhhPTAuNSkgKyBzY2FsZV9jb2xvcl9ncmFkaWVudG4oY29sb3JzID0gYygiIzE2MWM2NCIsIiMxODI3ZTIiLCcjOTgzY2VjJywnI2FkM2NlYycsICcjZWEzY2VjJywnI2VjM2M2YycsJyNlYzYxM2MnLCcjZWM4NjNjJywnI2VjYTMzYycsJyNlY2QxM2MnKSkNCmBgYA0KYGBge3J9DQp0ZW1wLm1vZGVsIDwtIGxtKGNvdW50IH4gdGVtcCwgZGF0YSA9IGRmKQ0Kc3VtbWFyeSh0ZW1wLm1vZGVsKQ0KYGBgDQpgYGB7cn0NCiNtYWtlIHByZWRpY3Rpb24gdXNpbmcgdGhlIGxpbmVhciByZWdyZXNzaW9uIG1vZGVsDQpwcmludCgiSG93IG1hbnkgYmlrZXMgZG8gd2UgbmVlZCB3aGVuIHRoZSB0ZW1wcmF0dXJlIGlzIDMwPyIpDQp0ZW1wLnRlc3QgPC0gZGF0YS5mcmFtZSh0ZW1wPTMwKQ0KcHJlZGljdCh0ZW1wLm1vZGVsLHRlbXAudGVzdCkNCmBgYA0KYGBge3J9DQpkZiRob3VyIDwtIHNhcHBseShkZiRob3VyLCBhcy5udW1lcmljKQ0KUC5iaWtlIDwtIHNlbGVjdChkZiwgc2Vhc29uIDogaG91cikNCmJpa2UubnVtIDwtIGxhcHBseShQLmJpa2UsIGFzLm51bWVyaWMpDQpzdHIoYmlrZS5udW0pDQpgYGANCg0KYGBge3J9DQpiaWtlLm1vZGVsIDwtIGxtKGNvdW50IH4uLCBkYXRhID1iaWtlLm51bSkNCnN1bW1hcnkoYmlrZS5tb2RlbCkNCmBgYA0KIyMjQXQgdGhpcyBwb2l0IHdlIGNvdWxkIG1ha2UgYSBwcmVkaWN0aW9uL2Fzc3VwdGlvbjogVG9kYXksIGl0J3Mgc3VtbWVyIHRpbWUsIGFuZCBob2xpZGF5OyB0aGUgd2VhdGhlciBpcyAgY2xlYXIsIGZldyBjbG91ZHM7IHRlbXAgaXMgMjU7IGJ1dCBhaXIgdGVtcCBpcyBhYm91dCAyODsgaHVtaWRpdHkgaXMgYWJvdXQgNzAlOyB3ZWVkc3BlZWQgaXMgYWJvdXQgMTA7IGFuZCBpdCdzIGNsb3NlIHRvIDEwYW0gbm93LiBTbyB3ZSBjb3VsZCBwcmVkaWN0IGhvdyBtYW55IGJpa2VzIGFyZSBuZWVkZWQgZm9yIHRoZSBzcGVjaWZpYyBob3VyLCBkYXkgYW5kIHdlYXRoZXIuIA0KDQpgYGB7cn0NCmJpa2UudGVzdCA8LSBkYXRhLmZyYW1lKHNlYXNvbj0zLCBob2xpZGF5ID0xLCB3b3JraW5nZGF5ID0gMCwgd2VhdGhlciA9IDEsIHRlbXA9MjUsIGF0ZW1wID0gMjgsIGh1bWlkaXR5ID0gNzAuMCwgd2luZHNwZWVkID0gMTAuMCwgaG91ciA9IDEwLCBjYXN1YWwgPTExLCByZWdpc3RlcmVkID0gMTQwKQ0KcHJlZGljdChiaWtlLm1vZGVsLCBiaWtlLnRlc3QpDQpgYGANCmBgYHtyfQ0KDQpgYGANCg0K