# Solution.R
# Data Science @ George Washington University
# Author:Xiaodan Chen
# Here three functions are defined which will be called by Driver_Q1.R file
# Load Data
setwd("C:/Users/Administrator/Desktop/bayesian/Bike-Sharing-Dataset") #set the working directory
#Load data function
LoadData<-function(file){
data<-read.csv(file)
return(data[,c('temp','cnt')])
}
#train data
train<-'hour.csv'
trainData<-LoadData(train)
#test data
test<-'day.csv'
testData<-LoadData(test)
# Train the linear model on training data
#function for the linear model
train<-function(dataset){
model<-lm(cnt~.,data=dataset)
return(model$coefficients)
}
#coefficients beta0 and beta1
beta <- train(trainData)
beta0 <- beta[[1]]
beta1 <- beta[[2]]
#show beta0 and beta1
beta0
## [1] -0.03559611
beta1
## [1] 381.2949
# Plot the data and linear model
plotDataModel<-function(beta1,beta0,dataset,name){
attach(dataset)
plot(temp,cnt,main=name)
abline(beta0,beta1,col='red',lwd=3)
detach(dataset)
}
plotDataModel(beta1,beta0,trainData,'trainingResultFig')

# Load data from testing set
#test data
#I've already load test data at the beginning
#test<-'day.csv'
#testData<-LoadData(test)
#test the model on the test data
#coefficients beta0 and beta1
testbeta <- train(testData)
testbeta0 <- testbeta[[1]]
testbeta1 <- testbeta[[2]]
#show beta0 and beta1
testbeta0
## [1] 1214.642
testbeta1
## [1] 6640.71
#rss
rss<-function(dataset1,dataset2){
model<-lm(cnt~temp,data=dataset1)
pred<-predict(model,dataset2)
rss<-sum((dataset2$cnt-pred)^2)
return(rss)
}
rss(trainData,testData)
## [1] 16233007061
# Plot the data and linear model
plotDataModel(beta1,beta0,testData,'testingResultFig')
