TASK 2

linear regression model

input <- read.csv("input.csv", header=TRUE)
  View(input)
index <- sample(1:nrow(input),size=nrow(input)*0.7)
train<- input[index,]
View(train)
test <- input[-index,]
View(test)
pairs(train)

## correlation

cor(train)
##                                               Ticket.sales Stadium.Quality
## Ticket.sales                                     1.0000000      0.55045337
## Stadium.Quality                                  0.5504534      1.00000000
## Home_Team_Current.season.s.winning.percentage    0.4501944     -0.02524227
## Away_Team_Current.season.s.winning.percentage    0.3042202      0.11599603
## Distance.B.w.1.teams                             0.3872910      0.38832846
## Weekend                                          0.4534690      0.21747942
## Free.to.air.Tv                                   0.3472315      0.26635680
## X.of.promotions.provided                         0.9900755      0.49721690
##                                               Home_Team_Current.season.s.winning.percentage
## Ticket.sales                                                                     0.45019444
## Stadium.Quality                                                                 -0.02524227
## Home_Team_Current.season.s.winning.percentage                                    1.00000000
## Away_Team_Current.season.s.winning.percentage                                   -0.09830730
## Distance.B.w.1.teams                                                             0.15337632
## Weekend                                                                          0.17990448
## Free.to.air.Tv                                                                   0.12556845
## X.of.promotions.provided                                                         0.46771044
##                                               Away_Team_Current.season.s.winning.percentage
## Ticket.sales                                                                     0.30422019
## Stadium.Quality                                                                  0.11599603
## Home_Team_Current.season.s.winning.percentage                                   -0.09830730
## Away_Team_Current.season.s.winning.percentage                                    1.00000000
## Distance.B.w.1.teams                                                             0.05492704
## Weekend                                                                         -0.04266925
## Free.to.air.Tv                                                                  -0.31355364
## X.of.promotions.provided                                                         0.27458127
##                                               Distance.B.w.1.teams
## Ticket.sales                                            0.38729101
## Stadium.Quality                                         0.38832846
## Home_Team_Current.season.s.winning.percentage           0.15337632
## Away_Team_Current.season.s.winning.percentage           0.05492704
## Distance.B.w.1.teams                                    1.00000000
## Weekend                                                 0.43702970
## Free.to.air.Tv                                          0.18809057
## X.of.promotions.provided                                0.38519041
##                                                   Weekend Free.to.air.Tv
## Ticket.sales                                   0.45346897      0.3472315
## Stadium.Quality                                0.21747942      0.2663568
## Home_Team_Current.season.s.winning.percentage  0.17990448      0.1255684
## Away_Team_Current.season.s.winning.percentage -0.04266925     -0.3135536
## Distance.B.w.1.teams                           0.43702970      0.1880906
## Weekend                                        1.00000000      0.6123724
## Free.to.air.Tv                                 0.61237244      1.0000000
## X.of.promotions.provided                       0.49682858      0.3843007
##                                               X.of.promotions.provided
## Ticket.sales                                                 0.9900755
## Stadium.Quality                                              0.4972169
## Home_Team_Current.season.s.winning.percentage                0.4677104
## Away_Team_Current.season.s.winning.percentage                0.2745813
## Distance.B.w.1.teams                                         0.3851904
## Weekend                                                      0.4968286
## Free.to.air.Tv                                               0.3843007
## X.of.promotions.provided                                     1.0000000

linear regression model

M<-lm(train$Ticket.sales~train$X.of.promotions.provided,data=train)
summary(M)
## 
## Call:
## lm(formula = train$Ticket.sales ~ train$X.of.promotions.provided, 
##     data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2414.5 -1874.4 -1270.6  -922.2 22156.0 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    3.126e+03  1.753e+03   1.783   0.0906 .  
## train$X.of.promotions.provided 2.446e+00  7.964e-02  30.708   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5378 on 19 degrees of freedom
## Multiple R-squared:  0.9802, Adjusted R-squared:  0.9792 
## F-statistic:   943 on 1 and 19 DF,  p-value: < 2.2e-16

PREDICTION FOR TEST DATA

prediction<- predict(M, newdata=test[,-1])
## Warning: 'newdata' had 9 rows but variables found have 21 rows
prediction
##         1         2         3         4         5         6         7 
##  5691.219 74636.940 19592.375 12475.492 38661.219  9499.119  5255.891 
##         8         9        10        11        12        13        14 
## 10357.547 91918.004 93969.916  7361.608 49199.586 19482.320 95919.111 
##        15        16        17        18        19        20        21 
## 98672.928 96510.961 82264.967 46215.876  8239.602  7080.357 32573.961