setwd("C:/Data Science Fundation with R/R-Course-HTML-Notes/R-Course-HTML-Notes/R-for-Data-Science-and-Machine-Learning/Training Exercises/Machine Learning Projects/CSV files for ML Projects")
list.files("C:/Data Science Fundation with R/R-Course-HTML-Notes/R-Course-HTML-Notes/R-for-Data-Science-and-Machine-Learning/Training Exercises/Machine Learning Projects/CSV files for ML Projects")
 [1] "Adult salary prediction_logistic regression.nb.html"
 [2] "Adult salary prediction_logistic regression.R"      
 [3] "Adult salary prediction_logistic regression.Rmd"    
 [4] "adult_sal.csv"                                      
 [5] "Adult_salary_prediction_logistic_regression_files"  
 [6] "bank_note_data.csv"                                 
 [7] "bikeshare.csv"                                      
 [8] "Bikeshare.nb.html"                                  
 [9] "Bikeshare.r"                                        
[10] "Bikeshare.rmd"                                      
[11] "bikeshare_DC.nb.html"                               
[12] "bikeshare_DC.Rmd"                                   
[13] "GEO.ORG_BLOG POST.pptx"                             
[14] "loan_data.csv"                                      
[15] "Rplot.png"                                          
[16] "Rplot01.png"                                        
[17] "rsconnect"                                          
[18] "winequality-red.csv"                                
[19] "winequality-white.csv"                              
Adult salary prediction_logistic regression.nb.html

Adult salary prediction_logistic regression.R

Adult salary prediction_logistic regression.Rmd

adult_sal.csv

Adult_salary_prediction_logistic_regression_files

bank_note_data.csv

bikeshare.csv

Bikeshare.nb.html

Bikeshare.r

Bikeshare.rmd

bikeshare_DC.nb.html

bikeshare_DC.Rmd

GEO.ORG_BLOG POST.pptx

loan_data.csv

Rplot.png

Rplot01.png

rsconnect

winequality-red.csv

winequality-white.csv
df <- read.csv("bikeshare.csv", sep = ",")
head(df, 4)
             datetime season holiday workingday weather temp  atemp
1 2011-01-01 00:00:00      1       0          0       1 9.84 14.395
2 2011-01-01 01:00:00      1       0          0       1 9.02 13.635
3 2011-01-01 02:00:00      1       0          0       1 9.02 13.635
4 2011-01-01 03:00:00      1       0          0       1 9.84 14.395
  humidity windspeed casual registered count
1       81         0      3         13    16
2       80         0      8         32    40
3       80         0      5         27    32
4       75         0      3         10    13
library(ggplot2)
library(ggthemes)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(corrgram)
library(plotly)

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
Sys.setenv("plotly_username"="geospatialanalystyi")
Sys.setenv("plotly_api_key"="kavplmotix")
Sys.setenv("plotly_domain"="https://plotly.your-company.com")

#check out the correlation between variables
#Bike.Num <- sapply(df, is.numeric)
#Bike.cor <- cor(df[,Bike.Num])
#corrplot::corrplot(Bike.cor, method = "color")
p1 <- corrgram(df, order = TRUE, lower.panel = panel.shade, upper.panel = panel.pie, text.panel = panel.txt)

#plotly::plotly(p1)
ggplot(df, aes(x=temp, y=count)) + geom_point(aes(colour = temp),alpha=0.5) 

#covert date "2013-03-11 07:00" to POSIXct format "2013-03" 
df$datetime <- as.POSIXct(df$datetime)
ggplot(df, aes(x=datetime, y=count)) + geom_point(aes(colour=temp),alpha=0.5,position=position_jitter(w=2, h=0)) + scale_color_continuous(low='#55D8CE',high='#FF6E2E') 

col.tem.count <- cor(df[,c('temp', 'count')])
col.tem.count
           temp     count
temp  1.0000000 0.3944536
count 0.3944536 1.0000000
ggplot(df, aes(x=factor(season), y=count)) + geom_boxplot(aes(colour=factor(season)))

df$hour <- format(df$datetime,"%H")
head(df)
             datetime season holiday workingday weather temp  atemp
1 2011-01-01 00:00:00      1       0          0       1 9.84 14.395
2 2011-01-01 01:00:00      1       0          0       1 9.02 13.635
3 2011-01-01 02:00:00      1       0          0       1 9.02 13.635
4 2011-01-01 03:00:00      1       0          0       1 9.84 14.395
5 2011-01-01 04:00:00      1       0          0       1 9.84 14.395
6 2011-01-01 05:00:00      1       0          0       2 9.84 12.880
  humidity windspeed casual registered count hour
1       81    0.0000      3         13    16   00
2       80    0.0000      8         32    40   01
3       80    0.0000      5         27    32   02
4       75    0.0000      3         10    13   03
5       75    0.0000      0          1     1   04
6       75    6.0032      0          1     1   05
workday.bike <- df %>%
  filter(workingday == 1)
#color selection was from http://www.color-hex.com/color-wheel/, I use color 1 combination.
ggplot(workday.bike, aes(x=hour, y=count)) + geom_point(aes(color=temp),position=position_jitter(w=1.5, h=0.5), alpha=0.5) + scale_color_gradientn(colors = c("#161c64","#1827e2",'#983cec','#ad3cec', '#ea3cec','#ec3c6c','#ec613c','#ec863c','#eca33c','#ecd13c'))

NotWork.day <- df %>%
  filter(workingday == 0)
ggplot(NotWork.day, aes(x=hour, y=count)) + geom_point(aes(color=temp),position=position_jitter(w=1.5, h=0.5), alpha=0.5) + scale_color_gradientn(colors = c("#161c64","#1827e2",'#983cec','#ad3cec', '#ea3cec','#ec3c6c','#ec613c','#ec863c','#eca33c','#ecd13c'))

temp.model <- lm(count ~ temp, data = df)
summary(temp.model)

Call:
lm(formula = count ~ temp, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-293.32 -112.36  -33.36   78.98  741.44 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   6.0462     4.4394   1.362    0.173    
temp          9.1705     0.2048  44.783   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 166.5 on 10884 degrees of freedom
Multiple R-squared:  0.1556,    Adjusted R-squared:  0.1555 
F-statistic:  2006 on 1 and 10884 DF,  p-value: < 2.2e-16
#make prediction using the linear regression model
print("How many bikes do we need when the temprature is 30?")
[1] "How many bikes do we need when the temprature is 30?"
temp.test <- data.frame(temp=30)
predict(temp.model,temp.test)
       1 
281.1624 
df$hour <- sapply(df$hour, as.numeric)
P.bike <- select(df, season : hour)
bike.num <- lapply(P.bike, as.numeric)
str(bike.num)
List of 12
 $ season    : num [1:10886] 1 1 1 1 1 1 1 1 1 1 ...
 $ holiday   : num [1:10886] 0 0 0 0 0 0 0 0 0 0 ...
 $ workingday: num [1:10886] 0 0 0 0 0 0 0 0 0 0 ...
 $ weather   : num [1:10886] 1 1 1 1 1 2 1 1 1 1 ...
 $ temp      : num [1:10886] 9.84 9.02 9.02 9.84 9.84 ...
 $ atemp     : num [1:10886] 14.4 13.6 13.6 14.4 14.4 ...
 $ humidity  : num [1:10886] 81 80 80 75 75 75 80 86 75 76 ...
 $ windspeed : num [1:10886] 0 0 0 0 0 ...
 $ casual    : num [1:10886] 3 8 5 3 0 0 2 1 1 8 ...
 $ registered: num [1:10886] 13 32 27 10 1 1 0 2 7 6 ...
 $ count     : num [1:10886] 16 40 32 13 1 1 2 3 8 14 ...
 $ hour      : num [1:10886] 0 1 2 3 4 5 6 7 8 9 ...
bike.model <- lm(count ~., data =bike.num)
summary(bike.model)

Call:
lm(formula = count ~ ., data = bike.num)

Residuals:
       Min         1Q     Median         3Q        Max 
-4.963e-11 -1.700e-14  0.000e+00  1.700e-14  3.672e-11 

Coefficients:
              Estimate Std. Error    t value Pr(>|t|)    
(Intercept) -1.762e-13  4.477e-14 -3.936e+00 8.33e-05 ***
season      -2.061e-13  6.949e-15 -2.966e+01  < 2e-16 ***
holiday      2.869e-13  4.466e-14  6.423e+00 1.39e-10 ***
workingday   3.457e-13  1.836e-14  1.883e+01  < 2e-16 ***
weather      7.914e-14  1.267e-14  6.245e+00 4.39e-10 ***
temp        -4.186e-14  5.498e-15 -7.613e+00 2.91e-14 ***
atemp       -1.750e-14  5.060e-15 -3.459e+00 0.000545 ***
humidity     1.139e-14  4.864e-16  2.341e+01  < 2e-16 ***
windspeed    6.567e-16  9.636e-16  6.820e-01 0.495534    
casual       1.000e+00  2.148e-16  4.655e+15  < 2e-16 ***
registered   1.000e+00  6.118e-17  1.635e+16  < 2e-16 ***
hour         5.611e-16  1.159e-15  4.840e-01 0.628477    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 7.49e-13 on 10874 degrees of freedom
Multiple R-squared:      1, Adjusted R-squared:      1 
F-statistic: 5.788e+31 on 11 and 10874 DF,  p-value: < 2.2e-16

At this poit we could make a prediction/assuption: Today, it’s summer time, and holiday; the weather is clear, few clouds; temp is 25; but air temp is about 28; humidity is about 70%; weedspeed is about 10; and it’s close to 10am now. So we could predict how many bikes are needed for the specific hour, day and weather.

bike.test <- data.frame(season=3, holiday =1, workingday = 0, weather = 1, temp=25, atemp = 28, humidity = 70.0, windspeed = 10.0, hour = 10, casual =11, registered = 140)
predict(bike.model, bike.test)
  1 
151 
LS0tDQp0aXRsZTogIkJpa2Ugc2hhcmVfTWVjaGluZSBsZWFybmluZzogbGluZWFyIHJlZ3Jlc3Npb24iDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCkF1dGhvcjogWmh1YW5nZmFuZyBZaS4gaHR0cHM6Ly9nZW95aS5vcmcNCi0tLQ0KDQpgYGB7cn0NCnNldHdkKCJDOi9EYXRhIFNjaWVuY2UgRnVuZGF0aW9uIHdpdGggUi9SLUNvdXJzZS1IVE1MLU5vdGVzL1ItQ291cnNlLUhUTUwtTm90ZXMvUi1mb3ItRGF0YS1TY2llbmNlLWFuZC1NYWNoaW5lLUxlYXJuaW5nL1RyYWluaW5nIEV4ZXJjaXNlcy9NYWNoaW5lIExlYXJuaW5nIFByb2plY3RzL0NTViBmaWxlcyBmb3IgTUwgUHJvamVjdHMiKQ0KbGlzdC5maWxlcygiQzovRGF0YSBTY2llbmNlIEZ1bmRhdGlvbiB3aXRoIFIvUi1Db3Vyc2UtSFRNTC1Ob3Rlcy9SLUNvdXJzZS1IVE1MLU5vdGVzL1ItZm9yLURhdGEtU2NpZW5jZS1hbmQtTWFjaGluZS1MZWFybmluZy9UcmFpbmluZyBFeGVyY2lzZXMvTWFjaGluZSBMZWFybmluZyBQcm9qZWN0cy9DU1YgZmlsZXMgZm9yIE1MIFByb2plY3RzIikNCmRmIDwtIHJlYWQuY3N2KCJiaWtlc2hhcmUuY3N2Iiwgc2VwID0gIiwiKQ0KaGVhZChkZiwgNCkNCmBgYA0KDQpgYGB7cn0NCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkoZ2d0aGVtZXMpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShjb3JyZ3JhbSkNCmxpYnJhcnkocGxvdGx5KQ0KU3lzLnNldGVudigicGxvdGx5X3VzZXJuYW1lIj0iZ2Vvc3BhdGlhbGFuYWx5c3R5aSIpDQpTeXMuc2V0ZW52KCJwbG90bHlfYXBpX2tleSI9ImthdnBsbW90aXgiKQ0KU3lzLnNldGVudigicGxvdGx5X2RvbWFpbiI9Imh0dHBzOi8vcGxvdGx5LnlvdXItY29tcGFueS5jb20iKQ0KDQojY2hlY2sgb3V0IHRoZSBjb3JyZWxhdGlvbiBiZXR3ZWVuIHZhcmlhYmxlcw0KI0Jpa2UuTnVtIDwtIHNhcHBseShkZiwgaXMubnVtZXJpYykNCiNCaWtlLmNvciA8LSBjb3IoZGZbLEJpa2UuTnVtXSkNCiNjb3JycGxvdDo6Y29ycnBsb3QoQmlrZS5jb3IsIG1ldGhvZCA9ICJjb2xvciIpDQpwMSA8LSBjb3JyZ3JhbShkZiwgb3JkZXIgPSBUUlVFLCBsb3dlci5wYW5lbCA9IHBhbmVsLnNoYWRlLCB1cHBlci5wYW5lbCA9IHBhbmVsLnBpZSwgdGV4dC5wYW5lbCA9IHBhbmVsLnR4dCkNCiNwbG90bHk6OnBsb3RseShwMSkNCmBgYA0KDQpgYGB7cn0NCmdncGxvdChkZiwgYWVzKHg9dGVtcCwgeT1jb3VudCkpICsgZ2VvbV9wb2ludChhZXMoY29sb3VyID0gdGVtcCksYWxwaGE9MC41KSANCmBgYA0KYGBge3J9DQojY292ZXJ0IGRhdGUgIjIwMTMtMDMtMTEgMDc6MDAiIHRvIFBPU0lYY3QgZm9ybWF0ICIyMDEzLTAzIiANCmRmJGRhdGV0aW1lIDwtIGFzLlBPU0lYY3QoZGYkZGF0ZXRpbWUpDQpnZ3Bsb3QoZGYsIGFlcyh4PWRhdGV0aW1lLCB5PWNvdW50KSkgKyBnZW9tX3BvaW50KGFlcyhjb2xvdXI9dGVtcCksYWxwaGE9MC41LHBvc2l0aW9uPXBvc2l0aW9uX2ppdHRlcih3PTIsIGg9MCkpICsgc2NhbGVfY29sb3JfY29udGludW91cyhsb3c9JyM1NUQ4Q0UnLGhpZ2g9JyNGRjZFMkUnKSANCmBgYA0KYGBge3J9DQpjb2wudGVtLmNvdW50IDwtIGNvcihkZlssYygndGVtcCcsICdjb3VudCcpXSkNCmNvbC50ZW0uY291bnQNCmBgYA0KYGBge3J9DQpnZ3Bsb3QoZGYsIGFlcyh4PWZhY3RvcihzZWFzb24pLCB5PWNvdW50KSkgKyBnZW9tX2JveHBsb3QoYWVzKGNvbG91cj1mYWN0b3Ioc2Vhc29uKSkpDQpgYGANCmBgYHtyfQ0KZGYkaG91ciA8LSBmb3JtYXQoZGYkZGF0ZXRpbWUsIiVIIikNCmhlYWQoZGYpDQpgYGANCg0KYGBge3J9DQp3b3JrZGF5LmJpa2UgPC0gZGYgJT4lDQogIGZpbHRlcih3b3JraW5nZGF5ID09IDEpDQojY29sb3Igc2VsZWN0aW9uIHdhcyBmcm9tIGh0dHA6Ly93d3cuY29sb3ItaGV4LmNvbS9jb2xvci13aGVlbC8sIEkgdXNlIGNvbG9yIDEgY29tYmluYXRpb24uDQpnZ3Bsb3Qod29ya2RheS5iaWtlLCBhZXMoeD1ob3VyLCB5PWNvdW50KSkgKyBnZW9tX3BvaW50KGFlcyhjb2xvcj10ZW1wKSxwb3NpdGlvbj1wb3NpdGlvbl9qaXR0ZXIodz0xLjUsIGg9MC41KSwgYWxwaGE9MC41KSArIHNjYWxlX2NvbG9yX2dyYWRpZW50bihjb2xvcnMgPSBjKCIjMTYxYzY0IiwiIzE4MjdlMiIsJyM5ODNjZWMnLCcjYWQzY2VjJywgJyNlYTNjZWMnLCcjZWMzYzZjJywnI2VjNjEzYycsJyNlYzg2M2MnLCcjZWNhMzNjJywnI2VjZDEzYycpKQ0KYGBgDQpgYGB7cn0NCk5vdFdvcmsuZGF5IDwtIGRmICU+JQ0KICBmaWx0ZXIod29ya2luZ2RheSA9PSAwKQ0KZ2dwbG90KE5vdFdvcmsuZGF5LCBhZXMoeD1ob3VyLCB5PWNvdW50KSkgKyBnZW9tX3BvaW50KGFlcyhjb2xvcj10ZW1wKSxwb3NpdGlvbj1wb3NpdGlvbl9qaXR0ZXIodz0xLjUsIGg9MC41KSwgYWxwaGE9MC41KSArIHNjYWxlX2NvbG9yX2dyYWRpZW50bihjb2xvcnMgPSBjKCIjMTYxYzY0IiwiIzE4MjdlMiIsJyM5ODNjZWMnLCcjYWQzY2VjJywgJyNlYTNjZWMnLCcjZWMzYzZjJywnI2VjNjEzYycsJyNlYzg2M2MnLCcjZWNhMzNjJywnI2VjZDEzYycpKQ0KYGBgDQpgYGB7cn0NCnRlbXAubW9kZWwgPC0gbG0oY291bnQgfiB0ZW1wLCBkYXRhID0gZGYpDQpzdW1tYXJ5KHRlbXAubW9kZWwpDQpgYGANCmBgYHtyfQ0KI21ha2UgcHJlZGljdGlvbiB1c2luZyB0aGUgbGluZWFyIHJlZ3Jlc3Npb24gbW9kZWwNCnByaW50KCJIb3cgbWFueSBiaWtlcyBkbyB3ZSBuZWVkIHdoZW4gdGhlIHRlbXByYXR1cmUgaXMgMzA/IikNCnRlbXAudGVzdCA8LSBkYXRhLmZyYW1lKHRlbXA9MzApDQpwcmVkaWN0KHRlbXAubW9kZWwsdGVtcC50ZXN0KQ0KYGBgDQpgYGB7cn0NCmRmJGhvdXIgPC0gc2FwcGx5KGRmJGhvdXIsIGFzLm51bWVyaWMpDQpQLmJpa2UgPC0gc2VsZWN0KGRmLCBzZWFzb24gOiBob3VyKQ0KYmlrZS5udW0gPC0gbGFwcGx5KFAuYmlrZSwgYXMubnVtZXJpYykNCnN0cihiaWtlLm51bSkNCmBgYA0KDQpgYGB7cn0NCmJpa2UubW9kZWwgPC0gbG0oY291bnQgfi4sIGRhdGEgPWJpa2UubnVtKQ0Kc3VtbWFyeShiaWtlLm1vZGVsKQ0KYGBgDQojIyNBdCB0aGlzIHBvaXQgd2UgY291bGQgbWFrZSBhIHByZWRpY3Rpb24vYXNzdXB0aW9uOiBUb2RheSwgaXQncyBzdW1tZXIgdGltZSwgYW5kIGhvbGlkYXk7IHRoZSB3ZWF0aGVyIGlzICBjbGVhciwgZmV3IGNsb3VkczsgdGVtcCBpcyAyNTsgYnV0IGFpciB0ZW1wIGlzIGFib3V0IDI4OyBodW1pZGl0eSBpcyBhYm91dCA3MCU7IHdlZWRzcGVlZCBpcyBhYm91dCAxMDsgYW5kIGl0J3MgY2xvc2UgdG8gMTBhbSBub3cuIFNvIHdlIGNvdWxkIHByZWRpY3QgaG93IG1hbnkgYmlrZXMgYXJlIG5lZWRlZCBmb3IgdGhlIHNwZWNpZmljIGhvdXIsIGRheSBhbmQgd2VhdGhlci4gDQoNCmBgYHtyfQ0KYmlrZS50ZXN0IDwtIGRhdGEuZnJhbWUoc2Vhc29uPTMsIGhvbGlkYXkgPTEsIHdvcmtpbmdkYXkgPSAwLCB3ZWF0aGVyID0gMSwgdGVtcD0yNSwgYXRlbXAgPSAyOCwgaHVtaWRpdHkgPSA3MC4wLCB3aW5kc3BlZWQgPSAxMC4wLCBob3VyID0gMTAsIGNhc3VhbCA9MTEsIHJlZ2lzdGVyZWQgPSAxNDApDQpwcmVkaWN0KGJpa2UubW9kZWwsIGJpa2UudGVzdCkNCmBgYA0KYGBge3J9DQoNCmBgYA0KDQo=