input <- read.csv("C:/Users/localadmin/Desktop/input.csv", header=TRUE)
View(input)
attach(input)
splitting in to data set in to training data and testing data
dt = sort(sample(nrow(input), nrow(input)*.7))
dt
## [1] 1 3 4 6 7 9 11 12 13 14 16 17 19 21 22 23 24 25 26 27 28
training<-input[dt,]
View(training)
testing<-input[-dt,]
View(testing)
correlation of training data
cor(training$Ticket.sales,training$X.of.promotions.provided)
## [1] 0.9789374
model<-lm(Ticket.sales ~ X.of.promotions.provided,data=training)
plot(Ticket.sales,X.of.promotions.provided)

summary(model)
##
## Call:
## lm(formula = Ticket.sales ~ X.of.promotions.provided, data = training)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9921.8 -2881.7 -2303.4 -494.7 21338.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4801.7057 2451.3184 1.959 0.065 .
## X.of.promotions.provided 2.3744 0.1136 20.901 1.43e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7258 on 19 degrees of freedom
## Multiple R-squared: 0.9583, Adjusted R-squared: 0.9561
## F-statistic: 436.8 on 1 and 19 DF, p-value: 1.428e-14
predicting
prediction<-data.frame(predict(model,testing[,-1]))
summary(prediction)
## predict.model..testing....1..
## Min. : 6870
## 1st Qu.:20682
## Median :49533
## Mean :53091
## 3rd Qu.:83731
## Max. :97565
actual_value<-data.frame(testing$Ticket.sales)
final<-cbind(predicted=prediction$predict.model..testing....1..,actual=actual_value$testing.Ticket.sales)
final
## predicted actual
## [1,] 97564.504 97670
## [2,] 91006.430 90764
## [3,] 81620.453 80883
## [4,] 83731.289 73870
## [5,] 49532.902 47097
## [6,] 39301.641 36324
## [7,] 20681.648 17600
## [8,] 6869.802 3960
## [9,] 7513.263 3807