# Sample_Solution.R
# This is the Sampele Solution file of course:
# Bayesian Methods for Data Science (DATS 6450 - 11, Spring 2018)
# Data Science @ George Washington University
# Author:Xiaodan Chen
# Here three functions are defined which will be called by Driver_Q1.R file
# Load Data
setwd("C:/Users/Administrator/Desktop/bayesian/Bike-Sharing-Dataset")
hour<-read.csv('hour.csv',stringsAsFactors = F)
day<-read.csv('day.csv',stringsAsFactors = F)
names(hour)#check attributes of the train dataset
## [1] "instant" "dteday" "season" "yr" "mnth"
## [6] "hr" "holiday" "weekday" "workingday" "weathersit"
## [11] "temp" "atemp" "hum" "windspeed" "casual"
## [16] "registered" "cnt"
train<-hour[,c(11,17)]
names(day)#check attributes of the test dataset
## [1] "instant" "dteday" "season" "yr" "mnth"
## [6] "holiday" "weekday" "workingday" "weathersit" "temp"
## [11] "atemp" "hum" "windspeed" "casual" "registered"
## [16] "cnt"
test<-day[,c(10,16)]
#train, building the linear regression model
sum(is.na(train))#check for NAs
## [1] 0
#datafram conversion
train<-data.frame(train)
test<-data.frame(test)
#model
model<-lm(cnt~.,data=train)
summary(model)
##
## Call:
## lm(formula = cnt ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -291.37 -110.23 -32.86 76.77 744.76
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0356 3.4827 -0.01 0.992
## temp 381.2949 6.5344 58.35 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 165.9 on 17377 degrees of freedom
## Multiple R-squared: 0.1638, Adjusted R-squared: 0.1638
## F-statistic: 3405 on 1 and 17377 DF, p-value: < 2.2e-16
As we can see, cnt=381.2949*temp-0.0356, temp’s weight(b1) is 381.2949, the intercept(b0) is -0.0356
#test
pred<-predict(model,test)
submit <- data.frame(temp = test$temp, cnt = pred)
write.csv(submit, file = "prediction.csv", row.names = FALSE)
#RSS
RSS<-sum(model$residuals^2) #residual sum of squares
RSS
## [1] 478083832
#plot
#train data
plot(train$temp,train$cnt,main='TrainingResultFig')
abline(model,col='red')
#test data
plot(test$temp,test$cnt,main='TestingResultFig')
abline(model,col='red')
```