rtask2

input <- read.csv("C:/Users/localadmin/Desktop/input.csv", header=TRUE)
View(input)
attach(input)

splitting in to data set in to training data and testing data

dt = sort(sample(nrow(input), nrow(input)*.7))
dt

##  [1]  1  3  4  6  7  9 11 12 13 14 16 17 19 21 22 23 24 25 26 27 28

training<-input[dt,]
View(training)
testing<-input[-dt,]
View(testing)

correlation of training data

cor(training$Ticket.sales,training$X.of.promotions.provided)

## [1] 0.9789374

model<-lm(Ticket.sales ~ X.of.promotions.provided,data=training)
plot(Ticket.sales,X.of.promotions.provided)

summary(model)

## 
## Call:
## lm(formula = Ticket.sales ~ X.of.promotions.provided, data = training)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9921.8 -2881.7 -2303.4  -494.7 21338.2 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              4801.7057  2451.3184   1.959    0.065 .  
## X.of.promotions.provided    2.3744     0.1136  20.901 1.43e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7258 on 19 degrees of freedom
## Multiple R-squared:  0.9583, Adjusted R-squared:  0.9561 
## F-statistic: 436.8 on 1 and 19 DF,  p-value: 1.428e-14

predicting

prediction<-data.frame(predict(model,testing[,-1]))
summary(prediction)

##  predict.model..testing....1..
##  Min.   : 6870                
##  1st Qu.:20682                
##  Median :49533                
##  Mean   :53091                
##  3rd Qu.:83731                
##  Max.   :97565

 actual_value<-data.frame(testing$Ticket.sales)

final<-cbind(predicted=prediction$predict.model..testing....1..,actual=actual_value$testing.Ticket.sales)
final

##       predicted actual
##  [1,] 97564.504  97670
##  [2,] 91006.430  90764
##  [3,] 81620.453  80883
##  [4,] 83731.289  73870
##  [5,] 49532.902  47097
##  [6,] 39301.641  36324
##  [7,] 20681.648  17600
##  [8,]  6869.802   3960
##  [9,]  7513.263   3807

rtask2

shyama

November 18, 2015

splitting in to data set in to training data and testing data

correlation of training data

predicting