data <- read.csv('MBA Starting Salaries Data.csv')
summary(data)
## age sex gmat_tot gmat_qpc
## Min. :22.00 Min. :1.000 Min. :450.0 Min. :28.00
## 1st Qu.:25.00 1st Qu.:1.000 1st Qu.:580.0 1st Qu.:72.00
## Median :27.00 Median :1.000 Median :620.0 Median :83.00
## Mean :27.36 Mean :1.248 Mean :619.5 Mean :80.64
## 3rd Qu.:29.00 3rd Qu.:1.000 3rd Qu.:660.0 3rd Qu.:93.00
## Max. :48.00 Max. :2.000 Max. :790.0 Max. :99.00
## gmat_vpc gmat_tpc s_avg f_avg
## Min. :16.00 Min. : 0.0 Min. :2.000 Min. :0.000
## 1st Qu.:71.00 1st Qu.:78.0 1st Qu.:2.708 1st Qu.:2.750
## Median :81.00 Median :87.0 Median :3.000 Median :3.000
## Mean :78.32 Mean :84.2 Mean :3.025 Mean :3.062
## 3rd Qu.:91.00 3rd Qu.:94.0 3rd Qu.:3.300 3rd Qu.:3.250
## Max. :99.00 Max. :99.0 Max. :4.000 Max. :4.000
## quarter work_yrs frstlang salary
## Min. :1.000 Min. : 0.000 Min. :1.000 Min. : 0
## 1st Qu.:1.250 1st Qu.: 2.000 1st Qu.:1.000 1st Qu.: 0
## Median :2.000 Median : 3.000 Median :1.000 Median : 999
## Mean :2.478 Mean : 3.872 Mean :1.117 Mean : 39026
## 3rd Qu.:3.000 3rd Qu.: 4.000 3rd Qu.:1.000 3rd Qu.: 97000
## Max. :4.000 Max. :22.000 Max. :2.000 Max. :220000
## satis
## Min. : 1.0
## 1st Qu.: 5.0
## Median : 6.0
## Mean :172.2
## 3rd Qu.: 7.0
## Max. :998.0
par(mfrow=c(4,4))
colnames <- dimnames(data)
for (i in 1:13) {
hist(data[,i] , main=colnames[i])
}

par(mfrow=c(4,4))
pairs(formula = ~ age + sex + gmat_tot + gmat_qpc + gmat_vpc + gmat_tpc + s_avg + f_avg + quarter + work_yrs + frstlang + salary + satis , data = data)

library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(data, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="MBA Starting Salaries")

library(corpcor)
library(tseries)
## Warning: package 'tseries' was built under R version 3.4.3
data_mat <- as.matrix(data)
covmat = cov(data_mat)
covmat
## age sex gmat_tot gmat_qpc
## age 1.376904e+01 -4.513248e-02 -3.115879e+01 -1.192655e+01
## sex -4.513248e-02 1.872677e-01 -1.328841e+00 -1.053769e+00
## gmat_tot -3.115879e+01 -1.328841e+00 3.310688e+03 6.200233e+02
## gmat_qpc -1.192655e+01 -1.053769e+00 6.200233e+02 2.210731e+02
## gmat_vpc -2.763643e+00 5.463758e-01 7.260006e+02 3.814826e+01
## gmat_tpc -8.839978e+00 -4.908960e-02 6.839911e+02 1.357997e+02
## s_avg 2.116874e-01 2.096227e-02 2.480257e+00 -1.691233e-01
## f_avg -3.399348e-02 2.082698e-02 3.154688e+00 5.753854e-01
## quarter -2.045935e-01 -6.414267e-02 -5.891153e+00 6.001979e-01
## work_yrs 1.029494e+01 -1.580172e-02 -3.391634e+01 -1.137186e+01
## frstlang 6.796610e-02 2.138980e-04 -2.499933e+00 6.646346e-01
## salary -1.183042e+04 1.518264e+03 -1.611600e+05 -3.335823e+04
## satis -1.763499e+02 -8.780808e+00 1.765263e+03 3.348371e+02
## gmat_vpc gmat_tpc s_avg f_avg
## age -2.7636427 -8.8399775 0.21168739 -0.03399348
## sex 0.5463758 -0.0490896 0.02096227 0.02082698
## gmat_tot 726.0006417 683.9910698 2.48025721 3.15468838
## gmat_qpc 38.1482581 135.7996845 -0.16912329 0.57538542
## gmat_vpc 284.2481217 157.4932488 1.31357023 0.67207000
## gmat_tpc 157.4932488 196.6057057 0.62710008 0.58698618
## s_avg 1.3135702 0.6271001 0.14521760 0.11016898
## f_avg 0.6720700 0.5869862 0.11016898 0.27567237
## quarter -3.2676666 -1.2923719 -0.32237213 -0.26080880
## work_yrs -3.6181653 -7.8575172 0.15926392 -0.06628700
## frstlang -2.1145691 -0.4663244 -0.01671372 -0.00626026
## salary -5273.8523836 3522.7500067 2831.60098580 787.65597177
## satis 392.3562739 484.2466779 -4.62884495 2.12532927
## quarter work_yrs frstlang salary
## age -2.045935e-01 10.29493864 6.796610e-02 -1.183042e+04
## sex -6.414267e-02 -0.01580172 2.138980e-04 1.518264e+03
## gmat_tot -5.891153e+00 -33.91633914 -2.499933e+00 -1.611600e+05
## gmat_qpc 6.001979e-01 -11.37186171 6.646346e-01 -3.335823e+04
## gmat_vpc -3.267667e+00 -3.61816529 -2.114569e+00 -5.273852e+03
## gmat_tpc -1.292372e+00 -7.85751718 -4.663244e-01 3.522750e+03
## s_avg -3.223721e-01 0.15926392 -1.671372e-02 2.831601e+03
## f_avg -2.608088e-01 -0.06628700 -6.260260e-03 7.876560e+02
## quarter 1.232119e+00 -0.30866822 3.553381e-02 -9.296214e+03
## work_yrs -3.086682e-01 10.44882490 -2.898318e-02 1.486147e+03
## frstlang 3.553381e-02 -0.02898318 1.035266e-01 -1.419586e+03
## salary -9.296214e+03 1486.14704152 -1.419586e+03 2.596062e+09
## satis -5.227133e-03 -131.24080907 9.484532e+00 -6.347115e+06
## satis
## age -1.763499e+02
## sex -8.780808e+00
## gmat_tot 1.765263e+03
## gmat_qpc 3.348371e+02
## gmat_vpc 3.923563e+02
## gmat_tpc 4.842467e+02
## s_avg -4.628845e+00
## f_avg 2.125329e+00
## quarter -5.227133e-03
## work_yrs -1.312408e+02
## frstlang 9.484532e+00
## salary -6.347115e+06
## satis 1.380974e+05
cov2cor(covmat)
## age sex gmat_tot gmat_qpc gmat_vpc
## age 1.00000000 -0.028106442 -0.14593840 -0.21616985 -0.04417547
## sex -0.02810644 1.000000000 -0.05336820 -0.16377435 0.07488782
## gmat_tot -0.14593840 -0.053368202 1.00000000 0.72473781 0.74839187
## gmat_qpc -0.21616985 -0.163774346 0.72473781 1.00000000 0.15218014
## gmat_vpc -0.04417547 0.074887816 0.74839187 0.15218014 1.00000000
## gmat_tpc -0.16990307 -0.008090213 0.84779965 0.65137754 0.66621604
## s_avg 0.14970402 0.127115144 0.11311702 -0.02984873 0.20445365
## f_avg -0.01744806 0.091663891 0.10442409 0.07370455 0.07592225
## quarter -0.04967221 -0.133533171 -0.09223903 0.03636638 -0.17460736
## work_yrs 0.85829810 -0.011296374 -0.18235434 -0.23660827 -0.06639049
## frstlang 0.05692649 0.001536205 -0.13503402 0.13892774 -0.38980465
## salary -0.06257355 0.068858628 -0.05497188 -0.04403293 -0.00613934
## satis -0.12788825 -0.054602220 0.08255770 0.06060004 0.06262375
## gmat_tpc s_avg f_avg quarter work_yrs
## age -0.169903066 0.14970402 -0.01744806 -4.967221e-02 0.858298096
## sex -0.008090213 0.12711514 0.09166389 -1.335332e-01 -0.011296374
## gmat_tot 0.847799647 0.11311702 0.10442409 -9.223903e-02 -0.182354339
## gmat_qpc 0.651377538 -0.02984873 0.07370455 3.636638e-02 -0.236608270
## gmat_vpc 0.666216035 0.20445365 0.07592225 -1.746074e-01 -0.066390490
## gmat_tpc 1.000000000 0.11736245 0.07973210 -8.303535e-02 -0.173361859
## s_avg 0.117362449 1.00000000 0.55062139 -7.621166e-01 0.129292714
## f_avg 0.079732099 0.55062139 1.00000000 -4.475064e-01 -0.039056921
## quarter -0.083035351 -0.76211664 -0.44750637 1.000000e+00 -0.086026406
## work_yrs -0.173361859 0.12929271 -0.03905692 -8.602641e-02 1.000000000
## frstlang -0.103362747 -0.13631308 -0.03705695 9.949226e-02 -0.027866747
## salary 0.004930901 0.14583606 0.02944303 -1.643699e-01 0.009023407
## satis 0.092934266 -0.03268664 0.01089273 -1.267198e-05 -0.109255286
## frstlang salary satis
## age 0.056926486 -0.062573547 -1.278882e-01
## sex 0.001536205 0.068858628 -5.460222e-02
## gmat_tot -0.135034017 -0.054971880 8.255770e-02
## gmat_qpc 0.138927742 -0.044032933 6.060004e-02
## gmat_vpc -0.389804653 -0.006139340 6.262375e-02
## gmat_tpc -0.103362747 0.004930901 9.293427e-02
## s_avg -0.136313080 0.145836062 -3.268664e-02
## f_avg -0.037056954 0.029443027 1.089273e-02
## quarter 0.099492259 -0.164369865 -1.267198e-05
## work_yrs -0.027866747 0.009023407 -1.092553e-01
## frstlang 1.000000000 -0.086592096 7.932264e-02
## salary -0.086592096 1.000000000 -3.352171e-01
## satis 0.079322637 -0.335217114 1.000000e+00
gotjob <- data[which(data$salary != 0 & data$salary != 998 & data$salary != 999),]
mytable <- xtabs(~ sex+frstlang, data=gotjob)
chisq.test(mytable)
## Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: mytable
## X-squared = 0.11264, df = 1, p-value = 0.7372
mytable <- xtabs(~ sex+satis, data=gotjob)
chisq.test(mytable)
## Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable
## X-squared = 7.3413, df = 4, p-value = 0.1189
t.test( sex~frstlang , data = gotjob)
##
## Welch Two Sample t-test
##
## data: sex by frstlang
## t = -0.66028, df = 6.6552, p-value = 0.5313
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.6323891 0.3585796
## sample estimates:
## mean in group 1 mean in group 2
## 1.291667 1.428571
k <- lm( formula = salary ~ age + sex + gmat_tot + gmat_qpc + gmat_vpc + gmat_tpc + s_avg + f_avg + quarter + work_yrs + frstlang + satis , data = gotjob)
summary(k)
##
## Call:
## lm(formula = salary ~ age + sex + gmat_tot + gmat_qpc + gmat_vpc +
## gmat_tpc + s_avg + f_avg + quarter + work_yrs + frstlang +
## satis, data = gotjob)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26489 -7983 -373 5923 70602
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 78005.66 52981.93 1.472 0.1444
## age 1750.65 1130.92 1.548 0.1251
## sex -3584.07 3595.85 -0.997 0.3216
## gmat_tot 16.19 178.85 0.090 0.9281
## gmat_qpc 796.55 496.78 1.603 0.1123
## gmat_vpc 546.31 501.97 1.088 0.2794
## gmat_tpc -1457.09 714.94 -2.038 0.0445 *
## s_avg -931.53 8240.31 -0.113 0.9102
## f_avg -2222.82 3894.57 -0.571 0.5696
## quarter -2336.56 2721.89 -0.858 0.3929
## work_yrs 749.66 1135.90 0.660 0.5110
## frstlang 7719.42 7373.27 1.047 0.2979
## satis -1086.54 2157.76 -0.504 0.6158
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15430 on 90 degrees of freedom
## Multiple R-squared: 0.3422, Adjusted R-squared: 0.2545
## F-statistic: 3.902 on 12 and 90 DF, p-value: 8.086e-05
notgotjob <- data[which(data$salary == 0 & data$salary != 998 & data$salary != 999),]
mytable <- xtabs(~ sex+frstlang, data=notgotjob)
chisq.test(mytable)
## Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: mytable
## X-squared = 0.21376, df = 1, p-value = 0.6438
mytable <- xtabs(~ sex+satis, data=notgotjob)
chisq.test(mytable)
## Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: mytable
## X-squared = 7.096, df = 3, p-value = 0.0689