Pricing of players in the Indian Premier League excel.XLS
GROUP 4 MEMBERS
1.Vinod G Moovankara
2.Pahar Singh
3.Nitin Sharma
4.Ambuj Singh
5.Shilpee Srivastava Saxena
6.Kaushal Falwaria
setwd(dir = "/Users/vinodg/IIMK - PROJECTS/CAPSTONE PROJECT/PROJECT 3")
library(readxl)
ipl <- read_excel("Pricing of players in the Indian Premier League excel.xls",
sheet = "IPL Raw Data")
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(fastDummies)
ipl <-dummy_cols(ipl, select_columns =c("playing_role"), split = TRUE)
ipl <-dummy_cols(ipl, select_columns =c("age"), split = TRUE)
colnames(ipl)[colnames(ipl) == 'age_1'] <- 'L25'
colnames(ipl)[colnames(ipl) == 'age_2'] <- 'B25_35'
colnames(ipl)[colnames(ipl) == 'age_3'] <- 'A35'
#summary(ipl)
sum(is.na(ipl))
## [1] 0
Missing_values <- data.frame(Missing=colSums(is.na(ipl)))
#Missing_values
iplbt <- ipl
iplbt$country <- ifelse(iplbt$country == "IND","IND","OTH")
iplbt <- dummy_cols(iplbt, select_columns =c("country"), split = TRUE)
#Selecting only the rows with data for batsmen
iplbt <- iplbt[ipl$playing_role_Batsman == '1',]
#Removing the unwanted rows
iplbt <- iplbt [, -c(1:6,8,11,12,19:25,27:30)]
#str(iplbt)
library(corrplot)
## corrplot 0.92 loaded
m = cor(iplbt)
#View(m)
#Generating Correlation plots
corrplot(m, order = 'AOE')
corrplot(m, method = 'number')
#Plots with a confidence level of 95%
testRes = cor.mtest(m, conf.level = 0.95)
corrplot(m, p.mat = testRes$p, method = 'circle', type = 'lower', insig='blank')
correlation_df_sig<-cor.mtest(m, conf.level = 0.95, method = "spearman")
Plot1<-corrplot(cor(m, method = 'spearman', use = "pairwise.complete.obs"),
method = 'color',
addCoef.col = 'black',
p.mat = correlation_df_sig$p,
insig='blank',
diag = FALSE,
number.cex = 0.5,
type='upper'
)
set.seed(1234)
options(scipen = 10)
model_bat <- lm(sold_price ~., data = iplbt)
summary(model_bat)
##
## Call:
## lm(formula = sold_price ~ ., data = iplbt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -676907 -208553 -17384 217490 582183
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1579559.56 493393.23 -3.201 0.003590 **
## t_runs 12.69 34.09 0.372 0.712674
## odi_runs_s 46.90 34.42 1.363 0.184641
## odi_sr_b 3025.33 3704.20 0.817 0.421505
## captaincy_exp 271595.90 156811.79 1.732 0.095126 .
## runs_s -587.34 263.06 -2.233 0.034391 *
## hs -6598.70 4869.94 -1.355 0.187079
## ave 41553.42 15142.69 2.744 0.010849 *
## sr_b 5535.43 3664.16 1.511 0.142925
## sixers 7784.67 5385.73 1.445 0.160285
## L25 769146.14 258820.22 2.972 0.006303 **
## B25_35 346990.52 150625.49 2.304 0.029491 *
## A35 NA NA NA NA
## country_IND 728394.46 185562.76 3.925 0.000568 ***
## country_OTH NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 342200 on 26 degrees of freedom
## Multiple R-squared: 0.7062, Adjusted R-squared: 0.5706
## F-statistic: 5.208 on 12 and 26 DF, p-value: 0.0002134
ggplotRegression <- function (fit) {
require(ggplot2)
ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
labs(title = paste("Adj R2 = ",signif(summary(fit)$adj.r.squared, 5),
"Intercept =",signif(fit$coef[[1]],5 ),
" Slope =",signif(fit$coef[[2]], 5),
" P =",signif(summary(fit)$coef[2,4], 5)))
}
ggplotRegression(lm(sold_price ~ runs_s, data = iplbt))
## Loading required package: ggplot2
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation ideoms with `aes()`
## `geom_smooth()` using formula = 'y ~ x'
ggplotRegression(lm(sold_price ~ ave, data = iplbt))
## `geom_smooth()` using formula = 'y ~ x'
ggplotRegression(lm(sold_price ~ L25, data = iplbt))
## `geom_smooth()` using formula = 'y ~ x'
ggplotRegression(lm(sold_price ~ B25_35, data = iplbt))
## `geom_smooth()` using formula = 'y ~ x'
ggplotRegression(lm(sold_price ~ country_IND, data = iplbt))
## `geom_smooth()` using formula = 'y ~ x'
error <- model_bat$residuals
hist(error)
iplbl <- ipl
iplbl$country <- ifelse(iplbl$country == "IND","IND","OTH")
iplbl <- dummy_cols(iplbl, select_columns =c("country"), split = TRUE)
iplbl <- iplbl[ipl$playing_role_Bowler == '1',]
#names(iplbl)
#Removing the unwanted columns
iplbl <- iplbl [, -c(1:6,7,9,10,14,15,16,17,18,24,25,27:30)]
#str(iplbl)
library(corrplot)
m = cor(iplbl)
#View(m)
corrplot(m, order = 'AOE')
corrplot(m, method = 'number')
testRes = cor.mtest(m, conf.level = 0.95)
corrplot(m, p.mat = testRes$p, method = 'circle', type = 'lower', insig='blank')
correlation_df_sig<-cor.mtest(m, conf.level = 0.95, method = "spearman")
Plot1<-corrplot(cor(m, method = 'spearman', use = "pairwise.complete.obs"),
method = 'color',
addCoef.col = 'black',
p.mat = correlation_df_sig$p,
insig='blank',
diag = FALSE,
number.cex = 0.5,
type='upper'
)
model_bowl <- lm(sold_price ~., data = iplbl)
summary(model_bowl)
##
## Call:
## lm(formula = sold_price ~ ., data = iplbl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -305992 -135589 -72003 90813 391639
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 609748.5 431075.4 1.414 0.16719
## t_wkts -331.4 463.6 -0.715 0.47999
## odi_wkts 436.7 543.1 0.804 0.42745
## odi_sr_bl 2034.4 3349.7 0.607 0.54806
## captaincy_exp 238138.8 194907.2 1.222 0.23099
## runs_c 685.6 233.6 2.935 0.00623 **
## wkts -10656.4 5837.2 -1.826 0.07756 .
## ave_bl 5135.5 12892.6 0.398 0.69312
## econ -49153.0 42976.5 -1.144 0.26150
## sr_bl -10345.0 21306.6 -0.486 0.63071
## L25 111645.9 163666.4 0.682 0.50021
## B25_35 -21960.4 125529.6 -0.175 0.86226
## A35 NA NA NA NA
## country_IND -115063.1 106720.9 -1.078 0.28928
## country_OTH NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 218000 on 31 degrees of freedom
## Multiple R-squared: 0.4924, Adjusted R-squared: 0.2959
## F-statistic: 2.506 on 12 and 31 DF, p-value: 0.01964
ggplotRegression <- function (fit) {
require(ggplot2)
ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
labs(title = paste("Adj R2 = ",signif(summary(fit)$adj.r.squared, 5),
"Intercept =",signif(fit$coef[[1]],5 ),
" Slope =",signif(fit$coef[[2]], 5),
" P =",signif(summary(fit)$coef[2,4], 5)))
}
ggplotRegression(lm(sold_price ~ runs_c, data = iplbl))
## `geom_smooth()` using formula = 'y ~ x'
ggplotRegression(lm(sold_price ~ wkts, data = iplbl))
## `geom_smooth()` using formula = 'y ~ x'
error <- model_bowl$residuals
hist(error)
ipla <- ipl
ipla$country <- ifelse(ipla$country == "IND","IND","OTH")
ipla <- dummy_cols(ipla, select_columns =c("country"), split = TRUE)
ipla <- ipla[ipl$playing_role_Allrounder == '1',]
ipla <- ipla [, -c(1:6,24:25,27:30)]
#str(ipla)
library(corrplot)
m = cor(ipla)
#View(m)
corrplot(m, order = 'AOE')
corrplot(m, method = 'number')
testRes = cor.mtest(m, conf.level = 0.95)
corrplot(m, p.mat = testRes$p, method = 'circle', type = 'lower', insig='blank')
correlation_df_sig<-cor.mtest(m, conf.level = 0.95, method = "spearman")
Plot1<-corrplot(cor(m, method = 'spearman', use = "pairwise.complete.obs"),
method = 'color',
addCoef.col = 'black',
p.mat = correlation_df_sig$p,
insig='blank',
diag = FALSE,
number.cex = 0.5,
type='upper'
)
set.seed(1234)
options(scipen = 10)
model_ar <- lm(sold_price ~., data = ipla)
summary(model_ar)
##
## Call:
## lm(formula = sold_price ~ ., data = ipla)
##
## Residuals:
## Min 1Q Median 3Q Max
## -496575 -216037 34869 138288 426132
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 739011.27 419137.75 1.763 0.0997 .
## t_runs -51.13 125.32 -0.408 0.6894
## t_wkts -684.07 3078.09 -0.222 0.8273
## odi_runs_s -33.04 152.87 -0.216 0.8320
## odi_sr_b 4441.83 3963.67 1.121 0.2813
## odi_wkts 1032.24 3142.51 0.328 0.7474
## odi_sr_bl -6472.93 4118.57 -1.572 0.1384
## captaincy_exp 590306.79 410738.03 1.437 0.1726
## runs_s -151.98 540.32 -0.281 0.7826
## hs -346.25 6275.26 -0.055 0.9568
## ave 16945.02 16322.60 1.038 0.3168
## sr_b -2954.39 4132.80 -0.715 0.4864
## sixers 1583.25 6391.27 0.248 0.8079
## runs_c 575.82 1250.76 0.460 0.6523
## wkts -9965.03 34417.29 -0.290 0.7764
## ave_bl 39623.86 33289.88 1.190 0.2537
## econ -10676.95 47355.92 -0.225 0.8249
## sr_bl -51587.20 40097.66 -1.287 0.2191
## L25 -203673.23 428120.04 -0.476 0.6416
## B25_35 -577788.49 412930.15 -1.399 0.1835
## A35 NA NA NA NA
## country_IND 291771.78 209398.07 1.393 0.1852
## country_OTH NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 372400 on 14 degrees of freedom
## Multiple R-squared: 0.6356, Adjusted R-squared: 0.115
## F-statistic: 1.221 on 20 and 14 DF, p-value: 0.3567
iplbw <- ipl
iplbw$country <- ifelse(iplbw$country == "IND","IND","OTH")
iplbw <- dummy_cols(iplbw, select_columns =c("country"), split = TRUE)
iplbw$playing_role <- ifelse(iplbw$playing_role == "Batsman" | iplbw$playing_role == "W. Keeper","1","0")
iplbw <- dummy_cols(iplbw, select_columns =c("playing_role"), split = TRUE)
iplbw <- iplbw[iplbw$playing_role_1 == '1',]
iplbw <- iplbw [, -c(1:6,8,11,12,19:25,27:30, 36:37)]
#str(iplbw)
library(corrplot)
m = cor(iplbw)
#View(m)
corrplot(m, order = 'AOE')
corrplot(m, method = 'number')
testRes = cor.mtest(m, conf.level = 0.95)
corrplot(m, p.mat = testRes$p, method = 'circle', type = 'lower', insig='blank')
correlation_df_sig<-cor.mtest(m, conf.level = 0.95, method = "spearman")
Plot1<-corrplot(cor(m, method = 'spearman', use = "pairwise.complete.obs"),
method = 'color',
addCoef.col = 'black',
p.mat = correlation_df_sig$p,
insig='blank',
diag = FALSE,
number.cex = 0.5,
type='upper'
)
set.seed(1234)
options(scipen = 10)
model_bw <- lm(sold_price ~., data = iplbw)
summary(model_bw)
##
## Call:
## lm(formula = sold_price ~ ., data = iplbw)
##
## Residuals:
## Min 1Q Median 3Q Max
## -721316 -197342 938 194922 562580
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1032369.31604 416103.14611 -2.481 0.01764 *
## t_runs 0.05438 32.28123 0.002 0.99866
## odi_runs_s 48.16430 33.11068 1.455 0.15398
## odi_sr_b 1215.79791 3048.00521 0.399 0.69221
## captaincy_exp 283673.05528 140286.92233 2.022 0.05025 .
## runs_s -338.43272 218.50370 -1.549 0.12970
## hs -2696.64777 3266.85935 -0.825 0.41427
## ave 23855.30550 11153.31800 2.139 0.03894 *
## sr_b 3182.04960 3177.90576 1.001 0.32301
## sixers 7175.03266 4440.20779 1.616 0.11438
## L25 657848.29732 236097.78553 2.786 0.00827 **
## B25_35 248223.83110 135173.00034 1.836 0.07414 .
## A35 NA NA NA NA
## country_IND 548329.74691 150666.71234 3.639 0.00081 ***
## country_OTH NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 346600 on 38 degrees of freedom
## Multiple R-squared: 0.6279, Adjusted R-squared: 0.5105
## F-statistic: 5.345 on 12 and 38 DF, p-value: 0.00003361
ggplotRegression <- function (fit) {
require(ggplot2)
ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) +
geom_point() +
stat_smooth(method = "lm", col = "red") +
labs(title = paste("Adj R2 = ",signif(summary(fit)$adj.r.squared, 5),
"Intercept =",signif(fit$coef[[1]],5 ),
" Slope =",signif(fit$coef[[2]], 5),
" P =",signif(summary(fit)$coef[2,4], 5)))
}
ggplotRegression(lm(sold_price ~ ave, data = iplbt))
## `geom_smooth()` using formula = 'y ~ x'
ggplotRegression(lm(sold_price ~ L25, data = iplbt))
## `geom_smooth()` using formula = 'y ~ x'
ggplotRegression(lm(sold_price ~ country_IND, data = iplbt))
## `geom_smooth()` using formula = 'y ~ x'
error <- model_bw$residuals
hist(error)
The result of this that we could arrive at the contributing factors for the selling price for various categories is as follows