# Solution.R
# Data Science @ George Washington University
# Author:Xiaodan Chen
# Here three functions are defined which will be called by Driver_Q1.R file 
# Load Data
setwd("C:/Users/Administrator/Desktop/bayesian/Bike-Sharing-Dataset") #set the working directory

#Load data function
LoadData<-function(file){
  data<-read.csv(file)
  return(data[,c('temp','cnt')])
}

#train data
train<-'hour.csv'
trainData<-LoadData(train)

#test data
test<-'day.csv'
testData<-LoadData(test)
# Train the linear model on training data
#function for the linear model
train<-function(dataset){
  model<-lm(cnt~.,data=dataset)
  return(model$coefficients)
}

#coefficients beta0 and beta1
beta <- train(trainData)
beta0 <- beta[[1]]
beta1 <- beta[[2]]

#show beta0 and beta1
beta0
## [1] -0.03559611
beta1
## [1] 381.2949
# Plot the data and linear model
plotDataModel<-function(beta1,beta0,dataset,name){
  attach(dataset)
  plot(temp,cnt,main=name)
  abline(beta0,beta1,col='red',lwd=3)
  detach(dataset)
}

plotDataModel(beta1,beta0,trainData,'trainingResultFig')

# Load data from testing set
#test data
#I've already load test data at the beginning
#test<-'day.csv'
#testData<-LoadData(test)

#test the model on the test data
#coefficients beta0 and beta1
testbeta <- train(testData)
testbeta0 <- testbeta[[1]]
testbeta1 <- testbeta[[2]]

#show beta0 and beta1
testbeta0
## [1] 1214.642
testbeta1
## [1] 6640.71
#rss
rss<-function(dataset1,dataset2){
  model<-lm(cnt~temp,data=dataset1)
  pred<-predict(model,dataset2)
  rss<-sum((dataset2$cnt-pred)^2)
  return(rss)
}

rss(trainData,testData)
## [1] 16233007061
# Plot the data and linear model
plotDataModel(beta1,beta0,testData,'testingResultFig')