getwd()
## [1] "C:/Users/TANAY/Downloads"
mba <- read.csv("MBA Starting Salaries Data.csv")

summary(mba)
##       age             sex           gmat_tot        gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :450.0   Min.   :28.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580.0   1st Qu.:72.00  
##  Median :27.00   Median :1.000   Median :620.0   Median :83.00  
##  Mean   :27.36   Mean   :1.248   Mean   :619.5   Mean   :80.64  
##  3rd Qu.:29.00   3rd Qu.:1.000   3rd Qu.:660.0   3rd Qu.:93.00  
##  Max.   :48.00   Max.   :2.000   Max.   :790.0   Max.   :99.00  
##     gmat_vpc        gmat_tpc        s_avg           f_avg      
##  Min.   :16.00   Min.   : 0.0   Min.   :2.000   Min.   :0.000  
##  1st Qu.:71.00   1st Qu.:78.0   1st Qu.:2.708   1st Qu.:2.750  
##  Median :81.00   Median :87.0   Median :3.000   Median :3.000  
##  Mean   :78.32   Mean   :84.2   Mean   :3.025   Mean   :3.062  
##  3rd Qu.:91.00   3rd Qu.:94.0   3rd Qu.:3.300   3rd Qu.:3.250  
##  Max.   :99.00   Max.   :99.0   Max.   :4.000   Max.   :4.000  
##     quarter         work_yrs         frstlang         salary      
##  Min.   :1.000   Min.   : 0.000   Min.   :1.000   Min.   :     0  
##  1st Qu.:1.250   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:     0  
##  Median :2.000   Median : 3.000   Median :1.000   Median :   999  
##  Mean   :2.478   Mean   : 3.872   Mean   :1.117   Mean   : 39026  
##  3rd Qu.:3.000   3rd Qu.: 4.000   3rd Qu.:1.000   3rd Qu.: 97000  
##  Max.   :4.000   Max.   :22.000   Max.   :2.000   Max.   :220000  
##      satis      
##  Min.   :  1.0  
##  1st Qu.:  5.0  
##  Median :  6.0  
##  Mean   :172.2  
##  3rd Qu.:  7.0  
##  Max.   :998.0
placed <- subset(mba, mba$salary>1000)

boxplot(placed$salary, main="Salary", horizontal=TRUE)

unplaced <- subset(mba, mba$salary==0)

library(lattice)

histogram(placed$age, main="Distribution of age",xlab="age")

histogram(placed$gmat_tot, main="Distribution of GMAT score",xlab="GMAT Score")

library(car)
scatterplot(salary~age, data=placed, main="Salary Vs Age", ylab="Salary", xlab="age")

scatterplot(salary~gmat_tot, data=placed, main="Salary Vs GMAT score", ylab="Salary", xlab="GMAt score")

scatterplot(salary~age, data=placed, main="Salary Vs Age", ylab="Salary", xlab="age")

scatterplot(salary~work_yrs, data=placed, main="Salary Vs Work experience", ylab="Salary", xlab="Work experience")

scatterplot(salary ~ satis , data=placed,
            xlab="Satisfaction level", ylab="Salary", 
            main="Salary v/s Satisfaction level")

boxplot(salary ~ sex , data=placed,
        main="Gender v/s Salary",
        horizontal=TRUE,
        xlab="Salary",
        main="Salary v/s Sex")

aggregate(placed$salary~placed$f_avg, FUN=mean)
##    placed$f_avg placed$salary
## 1          0.00      146000.0
## 2          2.00       95000.0
## 3          2.25       90000.0
## 4          2.50       99880.0
## 5          2.67       86000.0
## 6          2.75      107404.4
## 7          2.83      105000.0
## 8          3.00      103596.0
## 9          3.25       99660.0
## 10         3.33       82000.0
## 11         3.50      103547.1
## 12         3.60       95500.0
## 13         3.67       97500.0
## 14         3.75      113333.3
## 15         4.00      117500.0
aggregate(placed$salary~placed$s_avg, FUN=mean)
##    placed$s_avg placed$salary
## 1          2.20     105000.00
## 2          2.30      98000.00
## 3          2.40      93500.00
## 4          2.50     131000.00
## 5          2.60     107285.00
## 6          2.70      92375.00
## 7          2.80     101657.14
## 8          2.90      96888.92
## 9          2.91     105000.00
## 10         3.00     103250.00
## 11         3.09     103500.00
## 12         3.10     103542.86
## 13         3.20     104888.89
## 14         3.27      95000.00
## 15         3.30      99681.82
## 16         3.40      94600.00
## 17         3.45     105000.00
## 18         3.50      99700.00
## 19         3.60     116500.00
## 20         3.70     110500.00
## 21         3.80     112500.00
## 22         4.00     146000.00
aggregate(placed$salary~placed$age, FUN=mean)
##    placed$age placed$salary
## 1          22      85000.00
## 2          23      91651.20
## 3          24     101518.75
## 4          25      99086.96
## 5          26     101665.00
## 6          27     102214.29
## 7          28     103625.00
## 8          29     102083.33
## 9          30     109916.67
## 10         31     100500.00
## 11         32     107300.00
## 12         33     118000.00
## 13         34     105000.00
## 14         39     112000.00
## 15         40     183000.00
aggregate(placed$salary~placed$work_yrs, FUN=mean)
##    placed$work_yrs placed$salary
## 1                0      95000.00
## 2                1     103532.00
## 3                2      97673.68
## 4                3     101652.86
## 5                4     105454.55
## 6                5     103142.86
## 7                6     105928.57
## 8                7      98000.00
## 9                8     105025.00
## 10              10     118000.00
## 11              15     183000.00
## 12              16     108500.00
library("corrgram")
corrgram(placed,upper.panel=panel.pie, main="Corrgram of placed MBAs variables")

cor.test(placed$salary,placed$age)
## 
##  Pearson's product-moment correlation
## 
## data:  placed$salary and placed$age
## t = 5.7968, df = 101, p-value = 7.748e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3388862 0.6320523
## sample estimates:
##       cor 
## 0.4996428
cor.test(placed$salary,placed$work_yrs)
## 
##  Pearson's product-moment correlation
## 
## data:  placed$salary and placed$work_yrs
## t = 5.1303, df = 101, p-value = 1.403e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2863362 0.5957697
## sample estimates:
##       cor 
## 0.4546663
cor.test(placed$salary,placed$sex)
## 
##  Pearson's product-moment correlation
## 
## data:  placed$salary and placed$sex
## t = -1.6948, df = 101, p-value = 0.0932
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3485952  0.0281416
## sample estimates:
##        cor 
## -0.1662887
cor.test(placed$salary,placed$satis)
## 
##  Pearson's product-moment correlation
## 
## data:  placed$salary and placed$satis
## t = -0.40283, df = 101, p-value = 0.6879
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2317788  0.1546729
## sample estimates:
##        cor 
## -0.0400506
cor.test(placed$salary,placed$gmat_tot)
## 
##  Pearson's product-moment correlation
## 
## data:  placed$salary and placed$gmat_tot
## t = -0.91501, df = 101, p-value = 0.3624
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2792952  0.1046903
## sample estimates:
##         cor 
## -0.09067141
cor.test(placed$salary,placed$frstlang)
## 
##  Pearson's product-moment correlation
## 
## data:  placed$salary and placed$frstlang
## t = 2.7846, df = 101, p-value = 0.0064
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.07749965 0.43791500
## sample estimates:
##       cor 
## 0.2670195
cor.test(placed$salary,placed$f_avg)
## 
##  Pearson's product-moment correlation
## 
## data:  placed$salary and placed$f_avg
## t = -1.0717, df = 101, p-value = 0.2864
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.29353985  0.08931862
## sample estimates:
##       cor 
## -0.106039
cor.test(placed$salary,placed$s_avg)
## 
##  Pearson's product-moment correlation
## 
## data:  placed$salary and placed$s_avg
## t = 1.0277, df = 101, p-value = 0.3065
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.09363639  0.28955576
## sample estimates:
##       cor 
## 0.1017317
fit <- lm(salary~ age + sex + gmat_tot + gmat_qpc + gmat_vpc + gmat_tpc + s_avg +f_avg + quarter + work_yrs +frstlang + satis ,data=placed)
summary(fit)
## 
## Call:
## lm(formula = salary ~ age + sex + gmat_tot + gmat_qpc + gmat_vpc + 
##     gmat_tpc + s_avg + f_avg + quarter + work_yrs + frstlang + 
##     satis, data = placed)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -26489  -7983   -373   5923  70602 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 78005.66   52981.93   1.472   0.1444  
## age          1750.65    1130.92   1.548   0.1251  
## sex         -3584.07    3595.85  -0.997   0.3216  
## gmat_tot       16.19     178.85   0.090   0.9281  
## gmat_qpc      796.55     496.78   1.603   0.1123  
## gmat_vpc      546.31     501.97   1.088   0.2794  
## gmat_tpc    -1457.09     714.94  -2.038   0.0445 *
## s_avg        -931.53    8240.31  -0.113   0.9102  
## f_avg       -2222.82    3894.57  -0.571   0.5696  
## quarter     -2336.56    2721.89  -0.858   0.3929  
## work_yrs      749.66    1135.90   0.660   0.5110  
## frstlang     7719.42    7373.27   1.047   0.2979  
## satis       -1086.54    2157.76  -0.504   0.6158  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15430 on 90 degrees of freedom
## Multiple R-squared:  0.3422, Adjusted R-squared:  0.2545 
## F-statistic: 3.902 on 12 and 90 DF,  p-value: 8.086e-05
fit1 <- lm(salary~ age + sex + work_yrs  ,data=placed)
summary(fit1)
## 
## Call:
## lm(formula = salary ~ age + sex + work_yrs, data = placed)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -29250  -9239  -1146   5429  84318 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  45674.9    24522.2   1.863   0.0655 .
## age           2263.4     1004.8   2.253   0.0265 *
## sex          -3852.2     3395.2  -1.135   0.2593  
## work_yrs       478.2     1085.3   0.441   0.6604  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15600 on 99 degrees of freedom
## Multiple R-squared:  0.2602, Adjusted R-squared:  0.2378 
## F-statistic: 11.61 on 3 and 99 DF,  p-value: 1.389e-06
fit2 <- lm(salary~ age  + work_yrs + frstlang ,data=placed)
summary(fit2)
## 
## Call:
## lm(formula = salary ~ age + work_yrs + frstlang, data = placed)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -31941  -9139  -1086   4793  75526 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  40492.7    23417.5   1.729   0.0869 .
## age           1892.0     1075.9   1.759   0.0818 .
## work_yrs       747.2     1116.9   0.669   0.5050  
## frstlang      8546.9     6728.1   1.270   0.2069  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15570 on 99 degrees of freedom
## Multiple R-squared:  0.2626, Adjusted R-squared:  0.2403 
## F-statistic: 11.75 on 3 and 99 DF,  p-value: 1.188e-06
fit3 <- lm(salary~ age  + work_yrs  ,data=placed)
summary(fit3)
## 
## Call:
## lm(formula = salary ~ age + work_yrs, data = placed)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -31675  -8099  -2108   4411  80650 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  36967.5    23323.8   1.585   0.1161  
## age           2413.8      997.4   2.420   0.0173 *
## work_yrs       388.8     1084.0   0.359   0.7206  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15620 on 100 degrees of freedom
## Multiple R-squared:  0.2506, Adjusted R-squared:  0.2356 
## F-statistic: 16.72 on 2 and 100 DF,  p-value: 5.438e-07

Therefore, MBA starting salary depends on age,work experience.