data <- read.csv('MBA Starting Salaries Data.csv')
summary(data)
##       age             sex           gmat_tot        gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :450.0   Min.   :28.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580.0   1st Qu.:72.00  
##  Median :27.00   Median :1.000   Median :620.0   Median :83.00  
##  Mean   :27.36   Mean   :1.248   Mean   :619.5   Mean   :80.64  
##  3rd Qu.:29.00   3rd Qu.:1.000   3rd Qu.:660.0   3rd Qu.:93.00  
##  Max.   :48.00   Max.   :2.000   Max.   :790.0   Max.   :99.00  
##     gmat_vpc        gmat_tpc        s_avg           f_avg      
##  Min.   :16.00   Min.   : 0.0   Min.   :2.000   Min.   :0.000  
##  1st Qu.:71.00   1st Qu.:78.0   1st Qu.:2.708   1st Qu.:2.750  
##  Median :81.00   Median :87.0   Median :3.000   Median :3.000  
##  Mean   :78.32   Mean   :84.2   Mean   :3.025   Mean   :3.062  
##  3rd Qu.:91.00   3rd Qu.:94.0   3rd Qu.:3.300   3rd Qu.:3.250  
##  Max.   :99.00   Max.   :99.0   Max.   :4.000   Max.   :4.000  
##     quarter         work_yrs         frstlang         salary      
##  Min.   :1.000   Min.   : 0.000   Min.   :1.000   Min.   :     0  
##  1st Qu.:1.250   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:     0  
##  Median :2.000   Median : 3.000   Median :1.000   Median :   999  
##  Mean   :2.478   Mean   : 3.872   Mean   :1.117   Mean   : 39026  
##  3rd Qu.:3.000   3rd Qu.: 4.000   3rd Qu.:1.000   3rd Qu.: 97000  
##  Max.   :4.000   Max.   :22.000   Max.   :2.000   Max.   :220000  
##      satis      
##  Min.   :  1.0  
##  1st Qu.:  5.0  
##  Median :  6.0  
##  Mean   :172.2  
##  3rd Qu.:  7.0  
##  Max.   :998.0
par(mfrow=c(4,4))
colnames <- dimnames(data)
for (i in 1:13) {
  hist(data[,i] , main=colnames[i]) 
}

par(mfrow=c(4,4))
pairs(formula = ~ age + sex + gmat_tot  + gmat_qpc  + gmat_vpc  + gmat_tpc  + s_avg + f_avg + quarter   + work_yrs  + frstlang  + salary    + satis , data = data)

library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(data, order=TRUE, lower.panel=panel.shade,
         upper.panel=panel.pie, text.panel=panel.txt,
         main="MBA Starting Salaries")

library(corpcor)
library(tseries)
## Warning: package 'tseries' was built under R version 3.4.3
data_mat <- as.matrix(data)
covmat = cov(data_mat)
covmat
##                    age           sex      gmat_tot      gmat_qpc
## age       1.376904e+01 -4.513248e-02 -3.115879e+01 -1.192655e+01
## sex      -4.513248e-02  1.872677e-01 -1.328841e+00 -1.053769e+00
## gmat_tot -3.115879e+01 -1.328841e+00  3.310688e+03  6.200233e+02
## gmat_qpc -1.192655e+01 -1.053769e+00  6.200233e+02  2.210731e+02
## gmat_vpc -2.763643e+00  5.463758e-01  7.260006e+02  3.814826e+01
## gmat_tpc -8.839978e+00 -4.908960e-02  6.839911e+02  1.357997e+02
## s_avg     2.116874e-01  2.096227e-02  2.480257e+00 -1.691233e-01
## f_avg    -3.399348e-02  2.082698e-02  3.154688e+00  5.753854e-01
## quarter  -2.045935e-01 -6.414267e-02 -5.891153e+00  6.001979e-01
## work_yrs  1.029494e+01 -1.580172e-02 -3.391634e+01 -1.137186e+01
## frstlang  6.796610e-02  2.138980e-04 -2.499933e+00  6.646346e-01
## salary   -1.183042e+04  1.518264e+03 -1.611600e+05 -3.335823e+04
## satis    -1.763499e+02 -8.780808e+00  1.765263e+03  3.348371e+02
##               gmat_vpc     gmat_tpc         s_avg        f_avg
## age         -2.7636427   -8.8399775    0.21168739  -0.03399348
## sex          0.5463758   -0.0490896    0.02096227   0.02082698
## gmat_tot   726.0006417  683.9910698    2.48025721   3.15468838
## gmat_qpc    38.1482581  135.7996845   -0.16912329   0.57538542
## gmat_vpc   284.2481217  157.4932488    1.31357023   0.67207000
## gmat_tpc   157.4932488  196.6057057    0.62710008   0.58698618
## s_avg        1.3135702    0.6271001    0.14521760   0.11016898
## f_avg        0.6720700    0.5869862    0.11016898   0.27567237
## quarter     -3.2676666   -1.2923719   -0.32237213  -0.26080880
## work_yrs    -3.6181653   -7.8575172    0.15926392  -0.06628700
## frstlang    -2.1145691   -0.4663244   -0.01671372  -0.00626026
## salary   -5273.8523836 3522.7500067 2831.60098580 787.65597177
## satis      392.3562739  484.2466779   -4.62884495   2.12532927
##                quarter      work_yrs      frstlang        salary
## age      -2.045935e-01   10.29493864  6.796610e-02 -1.183042e+04
## sex      -6.414267e-02   -0.01580172  2.138980e-04  1.518264e+03
## gmat_tot -5.891153e+00  -33.91633914 -2.499933e+00 -1.611600e+05
## gmat_qpc  6.001979e-01  -11.37186171  6.646346e-01 -3.335823e+04
## gmat_vpc -3.267667e+00   -3.61816529 -2.114569e+00 -5.273852e+03
## gmat_tpc -1.292372e+00   -7.85751718 -4.663244e-01  3.522750e+03
## s_avg    -3.223721e-01    0.15926392 -1.671372e-02  2.831601e+03
## f_avg    -2.608088e-01   -0.06628700 -6.260260e-03  7.876560e+02
## quarter   1.232119e+00   -0.30866822  3.553381e-02 -9.296214e+03
## work_yrs -3.086682e-01   10.44882490 -2.898318e-02  1.486147e+03
## frstlang  3.553381e-02   -0.02898318  1.035266e-01 -1.419586e+03
## salary   -9.296214e+03 1486.14704152 -1.419586e+03  2.596062e+09
## satis    -5.227133e-03 -131.24080907  9.484532e+00 -6.347115e+06
##                  satis
## age      -1.763499e+02
## sex      -8.780808e+00
## gmat_tot  1.765263e+03
## gmat_qpc  3.348371e+02
## gmat_vpc  3.923563e+02
## gmat_tpc  4.842467e+02
## s_avg    -4.628845e+00
## f_avg     2.125329e+00
## quarter  -5.227133e-03
## work_yrs -1.312408e+02
## frstlang  9.484532e+00
## salary   -6.347115e+06
## satis     1.380974e+05
cov2cor(covmat)
##                  age          sex    gmat_tot    gmat_qpc    gmat_vpc
## age       1.00000000 -0.028106442 -0.14593840 -0.21616985 -0.04417547
## sex      -0.02810644  1.000000000 -0.05336820 -0.16377435  0.07488782
## gmat_tot -0.14593840 -0.053368202  1.00000000  0.72473781  0.74839187
## gmat_qpc -0.21616985 -0.163774346  0.72473781  1.00000000  0.15218014
## gmat_vpc -0.04417547  0.074887816  0.74839187  0.15218014  1.00000000
## gmat_tpc -0.16990307 -0.008090213  0.84779965  0.65137754  0.66621604
## s_avg     0.14970402  0.127115144  0.11311702 -0.02984873  0.20445365
## f_avg    -0.01744806  0.091663891  0.10442409  0.07370455  0.07592225
## quarter  -0.04967221 -0.133533171 -0.09223903  0.03636638 -0.17460736
## work_yrs  0.85829810 -0.011296374 -0.18235434 -0.23660827 -0.06639049
## frstlang  0.05692649  0.001536205 -0.13503402  0.13892774 -0.38980465
## salary   -0.06257355  0.068858628 -0.05497188 -0.04403293 -0.00613934
## satis    -0.12788825 -0.054602220  0.08255770  0.06060004  0.06262375
##              gmat_tpc       s_avg       f_avg       quarter     work_yrs
## age      -0.169903066  0.14970402 -0.01744806 -4.967221e-02  0.858298096
## sex      -0.008090213  0.12711514  0.09166389 -1.335332e-01 -0.011296374
## gmat_tot  0.847799647  0.11311702  0.10442409 -9.223903e-02 -0.182354339
## gmat_qpc  0.651377538 -0.02984873  0.07370455  3.636638e-02 -0.236608270
## gmat_vpc  0.666216035  0.20445365  0.07592225 -1.746074e-01 -0.066390490
## gmat_tpc  1.000000000  0.11736245  0.07973210 -8.303535e-02 -0.173361859
## s_avg     0.117362449  1.00000000  0.55062139 -7.621166e-01  0.129292714
## f_avg     0.079732099  0.55062139  1.00000000 -4.475064e-01 -0.039056921
## quarter  -0.083035351 -0.76211664 -0.44750637  1.000000e+00 -0.086026406
## work_yrs -0.173361859  0.12929271 -0.03905692 -8.602641e-02  1.000000000
## frstlang -0.103362747 -0.13631308 -0.03705695  9.949226e-02 -0.027866747
## salary    0.004930901  0.14583606  0.02944303 -1.643699e-01  0.009023407
## satis     0.092934266 -0.03268664  0.01089273 -1.267198e-05 -0.109255286
##              frstlang       salary         satis
## age       0.056926486 -0.062573547 -1.278882e-01
## sex       0.001536205  0.068858628 -5.460222e-02
## gmat_tot -0.135034017 -0.054971880  8.255770e-02
## gmat_qpc  0.138927742 -0.044032933  6.060004e-02
## gmat_vpc -0.389804653 -0.006139340  6.262375e-02
## gmat_tpc -0.103362747  0.004930901  9.293427e-02
## s_avg    -0.136313080  0.145836062 -3.268664e-02
## f_avg    -0.037056954  0.029443027  1.089273e-02
## quarter   0.099492259 -0.164369865 -1.267198e-05
## work_yrs -0.027866747  0.009023407 -1.092553e-01
## frstlang  1.000000000 -0.086592096  7.932264e-02
## salary   -0.086592096  1.000000000 -3.352171e-01
## satis     0.079322637 -0.335217114  1.000000e+00
gotjob <- data[which(data$salary != 0 & data$salary != 998 & data$salary != 999),]
mytable <- xtabs(~ sex+frstlang, data=gotjob)
chisq.test(mytable)
## Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  mytable
## X-squared = 0.11264, df = 1, p-value = 0.7372
mytable <- xtabs(~ sex+satis, data=gotjob)
chisq.test(mytable)
## Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  mytable
## X-squared = 7.3413, df = 4, p-value = 0.1189
t.test( sex~frstlang , data = gotjob)
## 
##  Welch Two Sample t-test
## 
## data:  sex by frstlang
## t = -0.66028, df = 6.6552, p-value = 0.5313
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.6323891  0.3585796
## sample estimates:
## mean in group 1 mean in group 2 
##        1.291667        1.428571
k <- lm( formula = salary ~ age + sex   + gmat_tot  + gmat_qpc  + gmat_vpc  + gmat_tpc  + s_avg + f_avg + quarter   + work_yrs  + frstlang + satis , data = gotjob)
summary(k)
## 
## Call:
## lm(formula = salary ~ age + sex + gmat_tot + gmat_qpc + gmat_vpc + 
##     gmat_tpc + s_avg + f_avg + quarter + work_yrs + frstlang + 
##     satis, data = gotjob)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -26489  -7983   -373   5923  70602 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 78005.66   52981.93   1.472   0.1444  
## age          1750.65    1130.92   1.548   0.1251  
## sex         -3584.07    3595.85  -0.997   0.3216  
## gmat_tot       16.19     178.85   0.090   0.9281  
## gmat_qpc      796.55     496.78   1.603   0.1123  
## gmat_vpc      546.31     501.97   1.088   0.2794  
## gmat_tpc    -1457.09     714.94  -2.038   0.0445 *
## s_avg        -931.53    8240.31  -0.113   0.9102  
## f_avg       -2222.82    3894.57  -0.571   0.5696  
## quarter     -2336.56    2721.89  -0.858   0.3929  
## work_yrs      749.66    1135.90   0.660   0.5110  
## frstlang     7719.42    7373.27   1.047   0.2979  
## satis       -1086.54    2157.76  -0.504   0.6158  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15430 on 90 degrees of freedom
## Multiple R-squared:  0.3422, Adjusted R-squared:  0.2545 
## F-statistic: 3.902 on 12 and 90 DF,  p-value: 8.086e-05
notgotjob <- data[which(data$salary == 0 & data$salary != 998 & data$salary != 999),]
mytable <- xtabs(~ sex+frstlang, data=notgotjob)
chisq.test(mytable)
## Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  mytable
## X-squared = 0.21376, df = 1, p-value = 0.6438
mytable <- xtabs(~ sex+satis, data=notgotjob)
chisq.test(mytable)
## Warning in chisq.test(mytable): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  mytable
## X-squared = 7.096, df = 3, p-value = 0.0689