# Sample_Solution.R
# This is the Sampele Solution file of course:
# Bayesian Methods for Data Science (DATS 6450 - 11, Spring 2018)
# Data Science @ George Washington University
# Author:Xiaodan Chen
# Here three functions are defined which will be called by Driver_Q1.R file 
# Load Data
setwd("C:/Users/Administrator/Desktop/bayesian/Bike-Sharing-Dataset")
hour<-read.csv('hour.csv',stringsAsFactors = F)
day<-read.csv('day.csv',stringsAsFactors = F)

names(hour)#check attributes of the train dataset
##  [1] "instant"    "dteday"     "season"     "yr"         "mnth"      
##  [6] "hr"         "holiday"    "weekday"    "workingday" "weathersit"
## [11] "temp"       "atemp"      "hum"        "windspeed"  "casual"    
## [16] "registered" "cnt"
train<-hour[,c(11,17)]

names(day)#check attributes of the test dataset
##  [1] "instant"    "dteday"     "season"     "yr"         "mnth"      
##  [6] "holiday"    "weekday"    "workingday" "weathersit" "temp"      
## [11] "atemp"      "hum"        "windspeed"  "casual"     "registered"
## [16] "cnt"
test<-day[,c(10,16)]
#train, building the linear regression model
sum(is.na(train))#check for NAs
## [1] 0
#datafram conversion
train<-data.frame(train)
test<-data.frame(test)

#model
model<-lm(cnt~.,data=train)
summary(model)
## 
## Call:
## lm(formula = cnt ~ ., data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -291.37 -110.23  -32.86   76.77  744.76 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.0356     3.4827   -0.01    0.992    
## temp        381.2949     6.5344   58.35   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 165.9 on 17377 degrees of freedom
## Multiple R-squared:  0.1638, Adjusted R-squared:  0.1638 
## F-statistic:  3405 on 1 and 17377 DF,  p-value: < 2.2e-16

As we can see, cnt=381.2949*temp-0.0356, temp’s weight(b1) is 381.2949, the intercept(b0) is -0.0356

#test
pred<-predict(model,test)
submit <- data.frame(temp = test$temp, cnt = pred)
write.csv(submit, file = "prediction.csv", row.names = FALSE)

#RSS
RSS<-sum(model$residuals^2) #residual sum of squares
RSS
## [1] 478083832
#plot
#train data
plot(train$temp,train$cnt,main='TrainingResultFig')
abline(model,col='red')

#test data
plot(test$temp,test$cnt,main='TestingResultFig')
abline(model,col='red')

```