mba<-read.csv(paste("MBA Starting Salaries Data.csv",sep=" "))
View(mba)

Understanding the data set:

Summarising

summary(mba)
##       age             sex           gmat_tot        gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :450.0   Min.   :28.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580.0   1st Qu.:72.00  
##  Median :27.00   Median :1.000   Median :620.0   Median :83.00  
##  Mean   :27.36   Mean   :1.248   Mean   :619.5   Mean   :80.64  
##  3rd Qu.:29.00   3rd Qu.:1.000   3rd Qu.:660.0   3rd Qu.:93.00  
##  Max.   :48.00   Max.   :2.000   Max.   :790.0   Max.   :99.00  
##     gmat_vpc        gmat_tpc        s_avg           f_avg      
##  Min.   :16.00   Min.   : 0.0   Min.   :2.000   Min.   :0.000  
##  1st Qu.:71.00   1st Qu.:78.0   1st Qu.:2.708   1st Qu.:2.750  
##  Median :81.00   Median :87.0   Median :3.000   Median :3.000  
##  Mean   :78.32   Mean   :84.2   Mean   :3.025   Mean   :3.062  
##  3rd Qu.:91.00   3rd Qu.:94.0   3rd Qu.:3.300   3rd Qu.:3.250  
##  Max.   :99.00   Max.   :99.0   Max.   :4.000   Max.   :4.000  
##     quarter         work_yrs         frstlang         salary      
##  Min.   :1.000   Min.   : 0.000   Min.   :1.000   Min.   :     0  
##  1st Qu.:1.250   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:     0  
##  Median :2.000   Median : 3.000   Median :1.000   Median :   999  
##  Mean   :2.478   Mean   : 3.872   Mean   :1.117   Mean   : 39026  
##  3rd Qu.:3.000   3rd Qu.: 4.000   3rd Qu.:1.000   3rd Qu.: 97000  
##  Max.   :4.000   Max.   :22.000   Max.   :2.000   Max.   :220000  
##      satis      
##  Min.   :  1.0  
##  1st Qu.:  5.0  
##  Median :  6.0  
##  Mean   :172.2  
##  3rd Qu.:  7.0  
##  Max.   :998.0

Describing

library(psych)
describe(mba)
##          vars   n     mean       sd median  trimmed     mad min    max
## age         1 274    27.36     3.71     27    26.76    2.97  22     48
## sex         2 274     1.25     0.43      1     1.19    0.00   1      2
## gmat_tot    3 274   619.45    57.54    620   618.86   59.30 450    790
## gmat_qpc    4 274    80.64    14.87     83    82.31   14.83  28     99
## gmat_vpc    5 274    78.32    16.86     81    80.33   14.83  16     99
## gmat_tpc    6 274    84.20    14.02     87    86.12   11.86   0     99
## s_avg       7 274     3.03     0.38      3     3.03    0.44   2      4
## f_avg       8 274     3.06     0.53      3     3.09    0.37   0      4
## quarter     9 274     2.48     1.11      2     2.47    1.48   1      4
## work_yrs   10 274     3.87     3.23      3     3.29    1.48   0     22
## frstlang   11 274     1.12     0.32      1     1.02    0.00   1      2
## salary     12 274 39025.69 50951.56    999 33607.86 1481.12   0 220000
## satis      13 274   172.18   371.61      6    91.50    1.48   1    998
##           range  skew kurtosis      se
## age          26  2.16     6.45    0.22
## sex           1  1.16    -0.66    0.03
## gmat_tot    340 -0.01     0.06    3.48
## gmat_qpc     71 -0.92     0.30    0.90
## gmat_vpc     83 -1.04     0.74    1.02
## gmat_tpc     99 -2.28     9.02    0.85
## s_avg         2 -0.06    -0.38    0.02
## f_avg         4 -2.08    10.85    0.03
## quarter       3  0.02    -1.35    0.07
## work_yrs     22  2.78     9.80    0.20
## frstlang      1  2.37     3.65    0.02
## salary   220000  0.70    -1.05 3078.10
## satis       997  1.77     1.13   22.45

Understanding the structure

str(mba)
## 'data.frame':    274 obs. of  13 variables:
##  $ age     : int  23 24 24 24 24 24 25 25 25 25 ...
##  $ sex     : int  2 1 1 1 2 1 1 2 1 1 ...
##  $ gmat_tot: int  620 610 670 570 710 640 610 650 630 680 ...
##  $ gmat_qpc: int  77 90 99 56 93 82 89 88 79 99 ...
##  $ gmat_vpc: int  87 71 78 81 98 89 74 89 91 81 ...
##  $ gmat_tpc: int  87 87 95 75 98 91 87 92 89 96 ...
##  $ s_avg   : num  3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
##  $ f_avg   : num  3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
##  $ quarter : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ work_yrs: int  2 2 2 1 2 2 2 2 2 2 ...
##  $ frstlang: int  1 1 1 1 1 1 1 1 2 1 ...
##  $ salary  : int  0 0 0 0 999 0 0 0 999 998 ...
##  $ satis   : int  7 6 6 7 5 6 5 6 4 998 ...

Some plots to understand the variables independently

SEX of the individual

attach(mba)
hist(sex,col="light green")

Most of the students were Male.

Age distribution

boxplot(age,col="light green",main="Age Distribution of the students",ylab="Age")

Most of the people are young.

Satisfaction for the program

barplot(table(satis[satis!=998]),col=c("light green"))

Average salaries

hist(salary,main="Salary distribution",col="light green")

Quartile rankings

hist(quarter,col="light green",main="Histogram of Quartile Ranking")

working years for different individuals

hist(work_yrs,col="light green")

Relation between different variables

Sex vs Salaries

boxplot(salary~sex,col=c("light blue","pink"),las=2,main="Plot of Sex and Salary",horizontal=TRUE,xlab="Salary",ylab="Sex",names=c("Females","Males"))

Clearly we can see that the average and the 1st quartile salaries are almost same for both men and women but the peak salaries for females are much heigher than the males.

Plot of GMAT score and Salary

plot(gmat_tot,salary,main="Salary vs Total score in GMAT",xlab="GMAT score",ylab="Salary")

Plot of gmat percentile and salary

plot(gmat_tpc,salary)

Plot of work experience and salary

boxplot(salary~work_yrs,main="Work experience vs Salary",col="peachpuff",xlab="Work Experience(in years)",ylab="Salary")

Plot for Quartile Ranking and salary

boxplot(salary~quarter,main="Quartile Ranking vs Salary",col="peachpuff",xlab="Quartile Rankings",ylab="Salary")

The plot shows that high quartile ranking will get you better salary.

Plot of Age and Salary

plot(salary~age,main="Salary vs Age")

Scatterplot of the data

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplotMatrix(formula = ~ age + gmat_tot +s_avg +f_avg + work_yrs +frstlang, cex=1,
                       data=mba,diagonal="histogram")

Corrgram for the Data

library(corrgram)
corrgram(mba,lower.panel = panel.shade,upper.panel = panel.pie,text.panel = panel.txt)

Variance-Covarience Matrix

Variance Matrix

var(mba)
##                    age           sex      gmat_tot      gmat_qpc
## age       1.376904e+01 -4.513248e-02 -3.115879e+01 -1.192655e+01
## sex      -4.513248e-02  1.872677e-01 -1.328841e+00 -1.053769e+00
## gmat_tot -3.115879e+01 -1.328841e+00  3.310688e+03  6.200233e+02
## gmat_qpc -1.192655e+01 -1.053769e+00  6.200233e+02  2.210731e+02
## gmat_vpc -2.763643e+00  5.463758e-01  7.260006e+02  3.814826e+01
## gmat_tpc -8.839978e+00 -4.908960e-02  6.839911e+02  1.357997e+02
## s_avg     2.116874e-01  2.096227e-02  2.480257e+00 -1.691233e-01
## f_avg    -3.399348e-02  2.082698e-02  3.154688e+00  5.753854e-01
## quarter  -2.045935e-01 -6.414267e-02 -5.891153e+00  6.001979e-01
## work_yrs  1.029494e+01 -1.580172e-02 -3.391634e+01 -1.137186e+01
## frstlang  6.796610e-02  2.138980e-04 -2.499933e+00  6.646346e-01
## salary   -1.183042e+04  1.518264e+03 -1.611600e+05 -3.335823e+04
## satis    -1.763499e+02 -8.780808e+00  1.765263e+03  3.348371e+02
##               gmat_vpc     gmat_tpc         s_avg        f_avg
## age         -2.7636427   -8.8399775    0.21168739  -0.03399348
## sex          0.5463758   -0.0490896    0.02096227   0.02082698
## gmat_tot   726.0006417  683.9910698    2.48025721   3.15468838
## gmat_qpc    38.1482581  135.7996845   -0.16912329   0.57538542
## gmat_vpc   284.2481217  157.4932488    1.31357023   0.67207000
## gmat_tpc   157.4932488  196.6057057    0.62710008   0.58698618
## s_avg        1.3135702    0.6271001    0.14521760   0.11016898
## f_avg        0.6720700    0.5869862    0.11016898   0.27567237
## quarter     -3.2676666   -1.2923719   -0.32237213  -0.26080880
## work_yrs    -3.6181653   -7.8575172    0.15926392  -0.06628700
## frstlang    -2.1145691   -0.4663244   -0.01671372  -0.00626026
## salary   -5273.8523836 3522.7500067 2831.60098580 787.65597177
## satis      392.3562739  484.2466779   -4.62884495   2.12532927
##                quarter      work_yrs      frstlang        salary
## age      -2.045935e-01   10.29493864  6.796610e-02 -1.183042e+04
## sex      -6.414267e-02   -0.01580172  2.138980e-04  1.518264e+03
## gmat_tot -5.891153e+00  -33.91633914 -2.499933e+00 -1.611600e+05
## gmat_qpc  6.001979e-01  -11.37186171  6.646346e-01 -3.335823e+04
## gmat_vpc -3.267667e+00   -3.61816529 -2.114569e+00 -5.273852e+03
## gmat_tpc -1.292372e+00   -7.85751718 -4.663244e-01  3.522750e+03
## s_avg    -3.223721e-01    0.15926392 -1.671372e-02  2.831601e+03
## f_avg    -2.608088e-01   -0.06628700 -6.260260e-03  7.876560e+02
## quarter   1.232119e+00   -0.30866822  3.553381e-02 -9.296214e+03
## work_yrs -3.086682e-01   10.44882490 -2.898318e-02  1.486147e+03
## frstlang  3.553381e-02   -0.02898318  1.035266e-01 -1.419586e+03
## salary   -9.296214e+03 1486.14704152 -1.419586e+03  2.596062e+09
## satis    -5.227133e-03 -131.24080907  9.484532e+00 -6.347115e+06
##                  satis
## age      -1.763499e+02
## sex      -8.780808e+00
## gmat_tot  1.765263e+03
## gmat_qpc  3.348371e+02
## gmat_vpc  3.923563e+02
## gmat_tpc  4.842467e+02
## s_avg    -4.628845e+00
## f_avg     2.125329e+00
## quarter  -5.227133e-03
## work_yrs -1.312408e+02
## frstlang  9.484532e+00
## salary   -6.347115e+06
## satis     1.380974e+05

Covariance Matrix

cov(mba)
##                    age           sex      gmat_tot      gmat_qpc
## age       1.376904e+01 -4.513248e-02 -3.115879e+01 -1.192655e+01
## sex      -4.513248e-02  1.872677e-01 -1.328841e+00 -1.053769e+00
## gmat_tot -3.115879e+01 -1.328841e+00  3.310688e+03  6.200233e+02
## gmat_qpc -1.192655e+01 -1.053769e+00  6.200233e+02  2.210731e+02
## gmat_vpc -2.763643e+00  5.463758e-01  7.260006e+02  3.814826e+01
## gmat_tpc -8.839978e+00 -4.908960e-02  6.839911e+02  1.357997e+02
## s_avg     2.116874e-01  2.096227e-02  2.480257e+00 -1.691233e-01
## f_avg    -3.399348e-02  2.082698e-02  3.154688e+00  5.753854e-01
## quarter  -2.045935e-01 -6.414267e-02 -5.891153e+00  6.001979e-01
## work_yrs  1.029494e+01 -1.580172e-02 -3.391634e+01 -1.137186e+01
## frstlang  6.796610e-02  2.138980e-04 -2.499933e+00  6.646346e-01
## salary   -1.183042e+04  1.518264e+03 -1.611600e+05 -3.335823e+04
## satis    -1.763499e+02 -8.780808e+00  1.765263e+03  3.348371e+02
##               gmat_vpc     gmat_tpc         s_avg        f_avg
## age         -2.7636427   -8.8399775    0.21168739  -0.03399348
## sex          0.5463758   -0.0490896    0.02096227   0.02082698
## gmat_tot   726.0006417  683.9910698    2.48025721   3.15468838
## gmat_qpc    38.1482581  135.7996845   -0.16912329   0.57538542
## gmat_vpc   284.2481217  157.4932488    1.31357023   0.67207000
## gmat_tpc   157.4932488  196.6057057    0.62710008   0.58698618
## s_avg        1.3135702    0.6271001    0.14521760   0.11016898
## f_avg        0.6720700    0.5869862    0.11016898   0.27567237
## quarter     -3.2676666   -1.2923719   -0.32237213  -0.26080880
## work_yrs    -3.6181653   -7.8575172    0.15926392  -0.06628700
## frstlang    -2.1145691   -0.4663244   -0.01671372  -0.00626026
## salary   -5273.8523836 3522.7500067 2831.60098580 787.65597177
## satis      392.3562739  484.2466779   -4.62884495   2.12532927
##                quarter      work_yrs      frstlang        salary
## age      -2.045935e-01   10.29493864  6.796610e-02 -1.183042e+04
## sex      -6.414267e-02   -0.01580172  2.138980e-04  1.518264e+03
## gmat_tot -5.891153e+00  -33.91633914 -2.499933e+00 -1.611600e+05
## gmat_qpc  6.001979e-01  -11.37186171  6.646346e-01 -3.335823e+04
## gmat_vpc -3.267667e+00   -3.61816529 -2.114569e+00 -5.273852e+03
## gmat_tpc -1.292372e+00   -7.85751718 -4.663244e-01  3.522750e+03
## s_avg    -3.223721e-01    0.15926392 -1.671372e-02  2.831601e+03
## f_avg    -2.608088e-01   -0.06628700 -6.260260e-03  7.876560e+02
## quarter   1.232119e+00   -0.30866822  3.553381e-02 -9.296214e+03
## work_yrs -3.086682e-01   10.44882490 -2.898318e-02  1.486147e+03
## frstlang  3.553381e-02   -0.02898318  1.035266e-01 -1.419586e+03
## salary   -9.296214e+03 1486.14704152 -1.419586e+03  2.596062e+09
## satis    -5.227133e-03 -131.24080907  9.484532e+00 -6.347115e+06
##                  satis
## age      -1.763499e+02
## sex      -8.780808e+00
## gmat_tot  1.765263e+03
## gmat_qpc  3.348371e+02
## gmat_vpc  3.923563e+02
## gmat_tpc  4.842467e+02
## s_avg    -4.628845e+00
## f_avg     2.125329e+00
## quarter  -5.227133e-03
## work_yrs -1.312408e+02
## frstlang  9.484532e+00
## salary   -6.347115e+06
## satis     1.380974e+05

Analysing the set of people who acutally got the job

selected<-mba[which(salary>1000),]      #As 998 and 999 are the answers of those "did not answer" and "did not                                              disclose salary" respectively.
library(car)
some(selected)
##     age sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 46   23   2      650       93       81       93   3.4  3.00       1
## 53   30   1      600       60       91       83   3.3  3.25       1
## 55   30   1      620       60       96       87   3.5  3.00       1
## 67   30   2      670       87       95       95   3.3  3.25       1
## 120  24   1      560       52       81       72   3.2  3.25       2
## 193  28   1      580       72       71       78   2.8  3.00       3
## 194  24   2      670       83       98       96   2.9  3.25       3
## 199  29   1      710       93       98       99   2.9  3.25       3
## 207  32   1      660       83       95       94   2.9  3.50       3
## 256  24   2      560       55       78       71   3.5  3.25       4
##     work_yrs frstlang salary satis
## 46         2        1 100000     7
## 53         5        1 105000     6
## 55         8        1 106000     7
## 67         8        1 120000     6
## 120        2        1  96000     7
## 193        3        1  97000     6
## 194        2        1  98000     7
## 199        7        1  98000     5
## 207        2        2 107300     7
## 256        2        1  64000     7

Affect of gender on starting salary

mytable1<-aggregate(salary~sex,data=selected,mean)
mytable1
##   sex    salary
## 1   1 104970.97
## 2   2  98524.39

It can easily be seen that the average salary of Male is much larger than Females.

Affect of salary on first language

mytable2<-aggregate(salary~frstlang,data=selected,mean)
mytable2
##   frstlang   salary
## 1        1 101748.6
## 2        2 120614.3

It can be clearly seen that english speaking students and non english speaking students both have comparable average salaries.

Salary affect on people who had prior experience .

mytable3<-aggregate(salary~work_yrs,data=selected,mean)
mytable3
##    work_yrs    salary
## 1         0  95000.00
## 2         1 103532.00
## 3         2  97673.68
## 4         3 101652.86
## 5         4 105454.55
## 6         5 103142.86
## 7         6 105928.57
## 8         7  98000.00
## 9         8 105025.00
## 10       10 118000.00
## 11       15 183000.00
## 12       16 108500.00

There is almost no affect on salary of experiences and non experienced people.

GMAT performance affecting the salary

mytable4<-aggregate(salary~gmat_tot,data=selected,mean)
mytable4
##    gmat_tot   salary
## 1       500 158250.0
## 2       520  78256.0
## 3       530  99500.0
## 4       540 104000.0
## 5       550 112236.7
## 6       560  94000.0
## 7       570 103857.1
## 8       580  99875.0
## 9       590  97000.0
## 10      600 107666.7
## 11      610  96200.0
## 12      620 104108.3
## 13      630 105812.5
## 14      640 110000.0
## 15      650 101285.7
## 16      660  92480.0
## 17      670 100642.9
## 18      680 102166.7
## 19      700 122333.3
## 20      710 101250.0
## 21      720  85000.0

The gmat score has no effect on the salary.

Quartile Ranking determining the salary

mytable5<-aggregate(salary~quarter,data=selected,mean)
mytable5
##   quarter   salary
## 1       1 106328.6
## 2       2 103612.0
## 3       3  98319.0
## 4       4 102142.6

Quartile Ranking has no effect on salary as well.

Chi-square tests

Test for sex and salary

chisq.test(mytable1)
## Warning in chisq.test(mytable1): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  mytable1
## X-squared = 0.0030128, df = 1, p-value = 0.9562

As p-value>0.05 therefore the test shows that sex is independant of salary

Test for first language and salary

chisq.test(mytable2)
## Warning in chisq.test(mytable2): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  mytable2
## X-squared = 1.0827e-22, df = 1, p-value = 1

As p-value>0.05 therefore the test shows that First language is independant of salary.

Test for work experience and salary

chisq.test(mytable3)
## 
##  Pearson's Chi-squared test
## 
## data:  mytable3
## X-squared = 33.445, df = 11, p-value = 0.0004455

As p-value<0.05 therefore the test shows that salary is dependant upon work experience a person have.

T-Tests

Test for GMAT performance with salary

t.test(mytable4)
## 
##  One Sample t-test
## 
## data:  mytable4
## t = 6.343, df = 41, p-value = 1.406e-07
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  35520.98 68705.25
## sample estimates:
## mean of x 
##  52113.12

As p-value<0.05 therefore the test shows that salary is dependant upon GMAT total score of a person.

Test for Quartile Ranking and salary

t.test(mytable5)
## 
##  One Sample t-test
## 
## data:  mytable5
## t = 2.6438, df = 7, p-value = 0.03324
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##   5416.909 97186.142
## sample estimates:
## mean of x 
##  51301.53

As p-value<0.05 therefore the test shows that Quartile Ranking is dependant of salary.

Analysing the set of people who did not get the job

notselected<-subset(mba,salary==0)
some(notselected)
##     age sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 1    23   2      620       77       87       87  3.40  3.00       1
## 7    25   1      610       89       74       87  3.40  3.50       1
## 24   28   2      540       75       50       65  3.60  4.00       1
## 29   32   1      640       79       91       91  3.60  3.75       1
## 33   42   2      650       75       98       93  3.38  3.00       1
## 34   48   1      590       84       62       81  3.80  4.00       1
## 92   27   1      720       99       95       99  3.10  3.25       2
## 150  25   1      550       72       58       69  2.90  3.00       3
## 183  34   1      610       79       81       86  2.80  3.00       3
## 236  28   1      710       94       98       99  3.40  3.75       4
##     work_yrs frstlang salary satis
## 1          2        1      0     7
## 7          2        1      0     5
## 24         5        1      0     5
## 29         7        1      0     6
## 33        13        1      0     5
## 34        22        1      0     6
## 92         5        1      0     5
## 150        3        1      0     6
## 183       11        1      0     6
## 236        6        1      0     6

Affect of gender on salary

mytable1<-with(notselected,table(sex))
mytable1
## sex
##  1  2 
## 67 23

More number of males are not selected.

Affect of First Language

mytable2<-with(notselected,table(frstlang))
mytable2
## frstlang
##  1  2 
## 82  8

A large number of people exactly 82 of them knew English and didn’t get selected.

Affect of Work Experience

mytable3<-with(notselected,table(work_yrs))
mytable3
## work_yrs
##  0  1  2  3  4  5  6  7  8  9 10 11 12 13 16 18 22 
##  1 12 22 14  9 12  2  5  2  1  1  2  2  1  1  1  2

People with less work experience and without selection are large in number.

Affect of GMAT Score

mytable4<-with(notselected,table(gmat_tot))
mytable4
## gmat_tot
## 450 480 510 530 540 550 560 570 580 590 600 610 620 630 640 650 660 670 
##   1   1   2   3   3   4   8   7   4   3   3   9   4   5   6   5   3   4 
## 680 700 710 720 730 740 750 760 
##   3   2   4   2   1   1   1   1

People not getting selected vary wastly in their GMAT Scores.

Affect of Quartile Ranking

mytable5<-with(notselected,table(quarter))
mytable5
## quarter
##  1  2  3  4 
## 18 27 23 22

People with heigher quartile are not selected but the difference is not too large.

So we conclude that people didn’t get a job because of a number of factors namely-Work Experience,Course performance,Languge,Gender(minutely dependant).

Regression models

Model for salary dependance on GMAT Scores

fit1<-lm(salary~gmat_qpc+gmat_tot+gmat_tpc+gmat_vpc,data=mba)
summary(fit1)
## 
## Call:
## lm(formula = salary ~ gmat_qpc + gmat_tot + gmat_tpc + gmat_vpc, 
##     data = mba)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -48199 -41195 -33034  56735 182897 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 141539.0    59303.9   2.387   0.0177 *
## gmat_qpc       465.7      615.2   0.757   0.4497  
## gmat_tot      -369.7      222.7  -1.660   0.0980 .
## gmat_tpc       523.2      443.0   1.181   0.2386  
## gmat_vpc       573.4      563.0   1.018   0.3094  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 50900 on 269 degrees of freedom
## Multiple R-squared:  0.01651,    Adjusted R-squared:  0.001889 
## F-statistic: 1.129 on 4 and 269 DF,  p-value: 0.343

The above model is not a good model as Multiple R-squared = 1.65% only and the Adjusted R-squared = 0.19% only.And by looking at the data we can say that salary is independent of the marks that you get in your GMAT exam.

Mode for physical and social traits

fit2<-lm(salary~age+sex+frstlang,data=mba)
summary(fit2)
## 
## Call:
## lm(formula = salary ~ age + sex + frstlang, data = mba)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -49884 -39412 -34465  53647 196395 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  64895.5    26504.0   2.449    0.015 *
## age           -767.9      830.9  -0.924    0.356  
## sex           7937.5     7113.0   1.116    0.265  
## frstlang    -13224.5     9578.4  -1.381    0.169  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 50840 on 270 degrees of freedom
## Multiple R-squared:  0.01537,    Adjusted R-squared:  0.004433 
## F-statistic: 1.405 on 3 and 270 DF,  p-value: 0.2416

This model is also not a good model as Multiple R-squared = 1.53% only and the Adjusted R-squared = 0.44% only.And by looking at the data we can say that salary is independent of the physical and social traits of a person.

Model for Marks obtained in course and work experience

fit3<-lm(salary~work_yrs+s_avg+f_avg+quarter,data=mba)
summary(fit3)
## 
## Call:
## lm(formula = salary ~ work_yrs + s_avg + f_avg + quarter, data = mba)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -66801 -40287 -29000  54493 197266 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  41852.2    46698.6   0.896    0.371
## work_yrs      -274.3      961.9  -0.285    0.776
## s_avg        12329.4    13418.1   0.919    0.359
## f_avg        -7861.6     7042.9  -1.116    0.265
## quarter      -6051.9     4257.7  -1.421    0.156
## 
## Residual standard error: 50490 on 269 degrees of freedom
## Multiple R-squared:  0.03258,    Adjusted R-squared:  0.01819 
## F-statistic: 2.265 on 4 and 269 DF,  p-value: 0.06256

This model is also not a good model as Multiple R-squared = 3.25% only and the Adjusted R-squared = 1.81% only.And by looking at the data we can say that salary is independent of Marks obtained in course and work experience.

We get to know that age is the only column which was comming out to be of some significance.Other than that the other columns had no significance on salary.