input <- read.csv("input.csv", header=TRUE)
View(input)
index <- sample(1:nrow(input),size=nrow(input)*0.7)
train<- input[index,]
View(train)
test <- input[-index,]
View(test)
pairs(train)
## correlation
cor(train)
## Ticket.sales Stadium.Quality
## Ticket.sales 1.0000000 0.55045337
## Stadium.Quality 0.5504534 1.00000000
## Home_Team_Current.season.s.winning.percentage 0.4501944 -0.02524227
## Away_Team_Current.season.s.winning.percentage 0.3042202 0.11599603
## Distance.B.w.1.teams 0.3872910 0.38832846
## Weekend 0.4534690 0.21747942
## Free.to.air.Tv 0.3472315 0.26635680
## X.of.promotions.provided 0.9900755 0.49721690
## Home_Team_Current.season.s.winning.percentage
## Ticket.sales 0.45019444
## Stadium.Quality -0.02524227
## Home_Team_Current.season.s.winning.percentage 1.00000000
## Away_Team_Current.season.s.winning.percentage -0.09830730
## Distance.B.w.1.teams 0.15337632
## Weekend 0.17990448
## Free.to.air.Tv 0.12556845
## X.of.promotions.provided 0.46771044
## Away_Team_Current.season.s.winning.percentage
## Ticket.sales 0.30422019
## Stadium.Quality 0.11599603
## Home_Team_Current.season.s.winning.percentage -0.09830730
## Away_Team_Current.season.s.winning.percentage 1.00000000
## Distance.B.w.1.teams 0.05492704
## Weekend -0.04266925
## Free.to.air.Tv -0.31355364
## X.of.promotions.provided 0.27458127
## Distance.B.w.1.teams
## Ticket.sales 0.38729101
## Stadium.Quality 0.38832846
## Home_Team_Current.season.s.winning.percentage 0.15337632
## Away_Team_Current.season.s.winning.percentage 0.05492704
## Distance.B.w.1.teams 1.00000000
## Weekend 0.43702970
## Free.to.air.Tv 0.18809057
## X.of.promotions.provided 0.38519041
## Weekend Free.to.air.Tv
## Ticket.sales 0.45346897 0.3472315
## Stadium.Quality 0.21747942 0.2663568
## Home_Team_Current.season.s.winning.percentage 0.17990448 0.1255684
## Away_Team_Current.season.s.winning.percentage -0.04266925 -0.3135536
## Distance.B.w.1.teams 0.43702970 0.1880906
## Weekend 1.00000000 0.6123724
## Free.to.air.Tv 0.61237244 1.0000000
## X.of.promotions.provided 0.49682858 0.3843007
## X.of.promotions.provided
## Ticket.sales 0.9900755
## Stadium.Quality 0.4972169
## Home_Team_Current.season.s.winning.percentage 0.4677104
## Away_Team_Current.season.s.winning.percentage 0.2745813
## Distance.B.w.1.teams 0.3851904
## Weekend 0.4968286
## Free.to.air.Tv 0.3843007
## X.of.promotions.provided 1.0000000
M<-lm(train$Ticket.sales~train$X.of.promotions.provided,data=train)
summary(M)
##
## Call:
## lm(formula = train$Ticket.sales ~ train$X.of.promotions.provided,
## data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2414.5 -1874.4 -1270.6 -922.2 22156.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.126e+03 1.753e+03 1.783 0.0906 .
## train$X.of.promotions.provided 2.446e+00 7.964e-02 30.708 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5378 on 19 degrees of freedom
## Multiple R-squared: 0.9802, Adjusted R-squared: 0.9792
## F-statistic: 943 on 1 and 19 DF, p-value: < 2.2e-16
prediction<- predict(M, newdata=test[,-1])
## Warning: 'newdata' had 9 rows but variables found have 21 rows
prediction
## 1 2 3 4 5 6 7
## 5691.219 74636.940 19592.375 12475.492 38661.219 9499.119 5255.891
## 8 9 10 11 12 13 14
## 10357.547 91918.004 93969.916 7361.608 49199.586 19482.320 95919.111
## 15 16 17 18 19 20 21
## 98672.928 96510.961 82264.967 46215.876 8239.602 7080.357 32573.961