Reading the data

mba <- read.csv(paste("MBA Starting Salaries Data.csv", sep=" "))

summary

library(psych)
summary(mba)
##       age             sex           gmat_tot        gmat_qpc    
##  Min.   :22.00   Min.   :1.000   Min.   :450.0   Min.   :28.00  
##  1st Qu.:25.00   1st Qu.:1.000   1st Qu.:580.0   1st Qu.:72.00  
##  Median :27.00   Median :1.000   Median :620.0   Median :83.00  
##  Mean   :27.36   Mean   :1.248   Mean   :619.5   Mean   :80.64  
##  3rd Qu.:29.00   3rd Qu.:1.000   3rd Qu.:660.0   3rd Qu.:93.00  
##  Max.   :48.00   Max.   :2.000   Max.   :790.0   Max.   :99.00  
##     gmat_vpc        gmat_tpc        s_avg           f_avg      
##  Min.   :16.00   Min.   : 0.0   Min.   :2.000   Min.   :0.000  
##  1st Qu.:71.00   1st Qu.:78.0   1st Qu.:2.708   1st Qu.:2.750  
##  Median :81.00   Median :87.0   Median :3.000   Median :3.000  
##  Mean   :78.32   Mean   :84.2   Mean   :3.025   Mean   :3.062  
##  3rd Qu.:91.00   3rd Qu.:94.0   3rd Qu.:3.300   3rd Qu.:3.250  
##  Max.   :99.00   Max.   :99.0   Max.   :4.000   Max.   :4.000  
##     quarter         work_yrs         frstlang         salary      
##  Min.   :1.000   Min.   : 0.000   Min.   :1.000   Min.   :     0  
##  1st Qu.:1.250   1st Qu.: 2.000   1st Qu.:1.000   1st Qu.:     0  
##  Median :2.000   Median : 3.000   Median :1.000   Median :   999  
##  Mean   :2.478   Mean   : 3.872   Mean   :1.117   Mean   : 39026  
##  3rd Qu.:3.000   3rd Qu.: 4.000   3rd Qu.:1.000   3rd Qu.: 97000  
##  Max.   :4.000   Max.   :22.000   Max.   :2.000   Max.   :220000  
##      satis      
##  Min.   :  1.0  
##  1st Qu.:  5.0  
##  Median :  6.0  
##  Mean   :172.2  
##  3rd Qu.:  7.0  
##  Max.   :998.0
str(mba)
## 'data.frame':    274 obs. of  13 variables:
##  $ age     : int  23 24 24 24 24 24 25 25 25 25 ...
##  $ sex     : int  2 1 1 1 2 1 1 2 1 1 ...
##  $ gmat_tot: int  620 610 670 570 710 640 610 650 630 680 ...
##  $ gmat_qpc: int  77 90 99 56 93 82 89 88 79 99 ...
##  $ gmat_vpc: int  87 71 78 81 98 89 74 89 91 81 ...
##  $ gmat_tpc: int  87 87 95 75 98 91 87 92 89 96 ...
##  $ s_avg   : num  3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
##  $ f_avg   : num  3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
##  $ quarter : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ work_yrs: int  2 2 2 1 2 2 2 2 2 2 ...
##  $ frstlang: int  1 1 1 1 1 1 1 1 2 1 ...
##  $ salary  : int  0 0 0 0 999 0 0 0 999 998 ...
##  $ satis   : int  7 6 6 7 5 6 5 6 4 998 ...
describe(mba)
##          vars   n     mean       sd median  trimmed     mad min    max
## age         1 274    27.36     3.71     27    26.76    2.97  22     48
## sex         2 274     1.25     0.43      1     1.19    0.00   1      2
## gmat_tot    3 274   619.45    57.54    620   618.86   59.30 450    790
## gmat_qpc    4 274    80.64    14.87     83    82.31   14.83  28     99
## gmat_vpc    5 274    78.32    16.86     81    80.33   14.83  16     99
## gmat_tpc    6 274    84.20    14.02     87    86.12   11.86   0     99
## s_avg       7 274     3.03     0.38      3     3.03    0.44   2      4
## f_avg       8 274     3.06     0.53      3     3.09    0.37   0      4
## quarter     9 274     2.48     1.11      2     2.47    1.48   1      4
## work_yrs   10 274     3.87     3.23      3     3.29    1.48   0     22
## frstlang   11 274     1.12     0.32      1     1.02    0.00   1      2
## salary     12 274 39025.69 50951.56    999 33607.86 1481.12   0 220000
## satis      13 274   172.18   371.61      6    91.50    1.48   1    998
##           range  skew kurtosis      se
## age          26  2.16     6.45    0.22
## sex           1  1.16    -0.66    0.03
## gmat_tot    340 -0.01     0.06    3.48
## gmat_qpc     71 -0.92     0.30    0.90
## gmat_vpc     83 -1.04     0.74    1.02
## gmat_tpc     99 -2.28     9.02    0.85
## s_avg         2 -0.06    -0.38    0.02
## f_avg         4 -2.08    10.85    0.03
## quarter       3  0.02    -1.35    0.07
## work_yrs     22  2.78     9.80    0.20
## frstlang      1  2.37     3.65    0.02
## salary   220000  0.70    -1.05 3078.10
## satis       997  1.77     1.13   22.45
boxplot(mba$gmat_tot, main="GMAT score distribution",horizontal=TRUE,col="bisque")

boxplot(mba$gmat_qpc, main="QPC distribution",horizontal=TRUE,col="grey")

boxplot(mba$gmat_vpc, main="VPC distribution",horizontal=TRUE,col="peachpuff")

boxplot(mba$gmat_tpc, main="TPC distribution",horizontal=TRUE,col="violet")

#Vizualizing Spring MBA average and Fall MBA average
par(mfrow=c(1,2))
boxplot(mba$s_avg, main="spring average",col="gold")
boxplot(mba$f_avg, main="fall average",col="darkgreen")

#Vizualizing age distribution
barplot(table(mba$age),col="darkblue")
#Vizualizing experience distribution
barplot(table(mba$work_yrs),main="Number of years of experience",col="lightblue")

#Vizualizing first language distribution
barplot(table(mba$frstlang),main="First Language",xlab="1->English 2-> Others",col="brown")
#Vizualizing salary distribution
hist(mba$salary, main="Salary distribution",xlim=c(50000,220000),ylim=c(0,50),xlab="Salary",ylab="count",col="beige")

#Vizualizing Degree of Satisfaction distribution
#par(mfrow=c(1,2))
hist(mba$satis, main="Degree of satisfaction with MBA program", xlab=" (1= low, 7 = high satisfaction)", xlim = c(1,7),breaks = (1:ceiling(max(mba$satis)/1)*1),col="maroon")
barplot(table(mba$satis),col="magenta")

Scatterplots

#Vizualizing GMAT total vs Salary, Quartile ranking vs Salary, Work experience vs Salary
#par(mfrow=c(1,3))
with(mba, plot(gmat_tot,salary ,cex=1))

with(mba, plot(quarter, salary,cex=1))

with(mba, plot(work_yrs, salary,cex=1))

#We can observe that the person with highest salary is not the one with highest GMAT score instead with below average GMAT score.
#Vizualizing Salary vs Satisfaction
plot(mba$salary,mba$satis,cex=1,ylim=c(1,7),ylab="satisfaction")

#Vizualizing Salary vs Sex
with(mba,plot(salary,sex))

#Vizualizing GMAT total vs Sex, GMAT total vs Spring average, GMAT total vs Fall average
par(mfrow=c(1,3))
with(mba, plot(gmat_tot,sex,cex=1))
with(mba, plot(gmat_tot, s_avg,cex=1))
with(mba, plot(gmat_tot, f_avg,cex=1))

ScatterPlotMatrix

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplotMatrix(formula=~gmat_tot+gmat_qpc + gmat_vpc+gmat_tpc,data=mba,diagonal="histogram")

scatterplotMatrix(formula=~s_avg + f_avg+work_yrs+salary,data=mba)

correlation between variables

library(corrgram)
library(corrplot)
## corrplot 0.84 loaded
x<-cov(mba[, c(1:10)]) 
corrplot(corr=cor(mba[ , c(1:10)],use="complete.obs"), 
         method =c("color"),main="corrplot")

corrgram(mba,order = NULL,panel=panel.cor,lower.panel = panel.shade,text.panel=panel.txt,main="Corrgram")

2a. Identify the crucial managerially relevant question(s) raised in the case 1. Is average gmat score of women > average gmat score of men? Is average salary of men > average salary of women? Do candidates with above average gmat score are placed better than below average gmat score candidates? Which quartile is highly populated? What is the average salary of people with 0-2 years of work experience? What is the relation candidates who are not yet placed with gmat score Figure out how to answer these questions using the given dataset, using R

notplaced <- mba[which(mba$salary==0), ]
placed<- mba[which (mba$salary > 1000)  , ]
#1. Comparing gmat scores of men and women
aggregate(gmat_tot ~ sex, data = mba, mean)
##   sex gmat_tot
## 1   1 621.2136
## 2   2 614.1176
#2. Comparing salaries of men and women
aggregate(salary ~ sex, data = mba, mean)
##   sex   salary
## 1   1 37013.62
## 2   2 45121.07
#3. Comparing below average and above average gmat scorers
x<-mean(mba$gmat_tot)
above_avg_gmat<- mba[which (placed$gmat_tot > x)  , ]
below_avg_gmat<- mba[which (placed$gmat_tot < x)  , ]

mean(above_avg_gmat$salary)
## [1] 34119.94
mean(below_avg_gmat$salary)
## [1] 38999.37

We can observe that below average candidates have performed better at placements

  1. Analyzing quartile
aggregate(salary ~ quarter, data = mba, mean)
##   quarter   salary
## 1       1 54166.28
## 2       2 37261.01
## 3       3 34037.40
## 4       4 30225.80
aggregate(salary~quarter,data=above_avg_gmat,mean)
##   quarter     salary
## 1       1 50930.3333
## 2       2   499.1667
aggregate(salary~quarter,data=below_avg_gmat,mean)
##   quarter   salary
## 1       1 57696.39
## 2       2   436.75
  1. Average salary of candidates with 0-2 years of experience
freshers <- placed[which (placed$salary >0)  , ]
mean(freshers$salary)
## [1] 103030.7
  1. Analyzing not placed students
aggregate(gmat_tot~quarter,data=notplaced,mean)
##   quarter gmat_tot
## 1       1 631.1111
## 2       2 605.5556
## 3       3 611.7391
## 4       4 614.0909

2b. Who got how much salary?

#Contingency table showing the affect of variousfactors on the starting salary
    mytable <-xtabs(~sex+salary,data=placed)
    mytable
##    salary
## sex 64000 77000 78256 82000 85000 86000 88000 88500 90000 92000 93000
##   1     0     1     0     0     1     0     0     1     3     2     2
##   2     1     0     1     1     3     2     1     0     0     1     1
##    salary
## sex 95000 96000 96500 97000 98000 99000 100000 100400 101000 101100 101600
##   1     4     3     1     2     6     0      4      1      0      1      1
##   2     3     1     0     0     4     1      5      0      2      0      0
##    salary
## sex 102500 103000 104000 105000 106000 107000 107300 107500 108000 110000
##   1      1      1      2     11      2      1      1      1      2      0
##   2      0      0      0      0      1      0      0      0      0      1
##    salary
## sex 112000 115000 118000 120000 126710 130000 145800 146000 162000 220000
##   1      3      5      1      3      1      1      1      1      1      0
##   2      0      0      0      1      0      0      0      0      0      1
mytable1 <-xtabs(~salary+work_yrs,data=placed)
    mytable1
##         work_yrs
## salary   0 1 2 3 4 5 6 7 8 10 15 16
##   64000  0 0 1 0 0 0 0 0 0  0  0  0
##   77000  0 0 1 0 0 0 0 0 0  0  0  0
##   78256  0 1 0 0 0 0 0 0 0  0  0  0
##   82000  0 1 0 0 0 0 0 0 0  0  0  0
##   85000  0 1 2 1 0 0 0 0 0  0  0  0
##   86000  0 0 1 1 0 0 0 0 0  0  0  0
##   88000  0 0 0 1 0 0 0 0 0  0  0  0
##   88500  0 0 0 1 0 0 0 0 0  0  0  0
##   90000  0 0 2 0 0 1 0 0 0  0  0  0
##   92000  0 0 3 0 0 0 0 0 0  0  0  0
##   93000  0 0 0 0 1 1 0 0 1  0  0  0
##   95000  1 1 2 2 0 1 0 0 0  0  0  0
##   96000  0 1 2 0 1 0 0 0 0  0  0  0
##   96500  0 0 1 0 0 0 0 0 0  0  0  0
##   97000  0 0 0 1 1 0 0 0 0  0  0  0
##   98000  0 0 7 1 1 0 0 1 0  0  0  0
##   99000  0 0 0 0 0 1 0 0 0  0  0  0
##   100000 0 0 6 1 1 0 1 0 0  0  0  0
##   100400 0 0 0 1 0 0 0 0 0  0  0  0
##   101000 0 0 2 0 0 0 0 0 0  0  0  0
##   101100 0 0 0 0 0 0 0 0 1  0  0  0
##   101600 0 0 0 1 0 0 0 0 0  0  0  0
##   102500 0 0 0 0 0 0 1 0 0  0  0  0
##   103000 0 0 0 1 0 0 0 0 0  0  0  0
##   104000 0 0 0 0 2 0 0 0 0  0  0  0
##   105000 0 0 4 4 0 1 1 0 0  0  0  1
##   106000 0 0 0 0 0 0 2 0 1  0  0  0
##   107000 0 0 1 0 0 0 0 0 0  0  0  0
##   107300 0 0 1 0 0 0 0 0 0  0  0  0
##   107500 0 0 0 1 0 0 0 0 0  0  0  0
##   108000 0 0 0 1 1 0 0 0 0  0  0  0
##   110000 0 0 0 0 0 0 1 0 0  0  0  0
##   112000 0 0 1 0 0 0 1 0 0  0  0  1
##   115000 0 2 0 1 2 0 0 0 0  0  0  0
##   118000 0 0 0 0 0 0 0 0 0  1  0  0
##   120000 0 0 0 1 0 2 0 0 1  0  0  0
##   126710 0 0 0 1 0 0 0 0 0  0  0  0
##   130000 0 0 0 0 1 0 0 0 0  0  0  0
##   145800 0 0 1 0 0 0 0 0 0  0  0  0
##   146000 0 0 0 0 0 0 0 0 0  0  1  0
##   162000 0 1 0 0 0 0 0 0 0  0  0  0
##   220000 0 0 0 0 0 0 0 0 0  0  1  0
  #Work experience is an added advantage while getting placed
    
 mytable2<-xtabs(~salary+frstlang,data=placed)
    mytable2
##         frstlang
## salary    1  2
##   64000   1  0
##   77000   1  0
##   78256   1  0
##   82000   1  0
##   85000   4  0
##   86000   2  0
##   88000   1  0
##   88500   1  0
##   90000   3  0
##   92000   3  0
##   93000   3  0
##   95000   7  0
##   96000   4  0
##   96500   1  0
##   97000   2  0
##   98000   8  2
##   99000   0  1
##   100000  9  0
##   100400  1  0
##   101000  2  0
##   101100  1  0
##   101600  1  0
##   102500  1  0
##   103000  1  0
##   104000  1  1
##   105000 11  0
##   106000  3  0
##   107000  1  0
##   107300  0  1
##   107500  1  0
##   108000  2  0
##   110000  1  0
##   112000  3  0
##   115000  5  0
##   118000  0  1
##   120000  4  0
##   126710  1  0
##   130000  1  0
##   145800  1  0
##   146000  1  0
##   162000  1  0
##   220000  0  1
#English communication is important
mytable3<-xtabs(~salary+gmat_tot,data=placed)
    mytable3
##         gmat_tot
## salary   500 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660
##   64000    0   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0
##   77000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   78256    0   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   82000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   85000    0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   1
##   86000    0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   88000    0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   88500    0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   90000    0   0   0   0   0   0   0   1   0   0   0   0   1   0   1   0
##   92000    0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   1
##   93000    0   0   0   1   0   0   0   0   0   0   1   1   0   0   0   0
##   95000    0   0   1   0   0   2   0   0   0   0   2   0   0   0   0   0
##   96000    0   0   0   0   0   1   0   0   1   1   0   0   0   0   1   0
##   96500    1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   97000    0   0   0   0   0   0   0   1   0   0   0   1   0   0   0   0
##   98000    0   0   0   0   0   1   3   1   1   0   1   0   0   0   0   0
##   99000    0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0
##   100000   0   0   0   0   0   2   0   1   0   1   1   0   1   0   2   0
##   100400   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   101000   0   0   0   0   0   0   0   0   0   1   0   1   0   0   0   0
##   101100   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   101600   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   102500   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   103000   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   104000   0   0   1   0   0   1   0   0   0   0   0   0   0   0   0   0
##   105000   0   0   0   0   2   0   2   3   0   1   0   1   0   0   1   0
##   106000   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   107000   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0
##   107300   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
##   107500   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   108000   0   0   0   0   0   0   1   0   0   1   0   0   0   0   0   0
##   110000   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0
##   112000   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0
##   115000   0   0   0   1   0   0   1   0   0   0   0   1   1   0   0   0
##   118000   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   120000   0   0   0   0   0   0   0   0   0   2   0   0   0   0   0   0
##   126710   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0   0
##   130000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
##   145800   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
##   146000   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0
##   162000   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   220000   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##         gmat_tot
## salary   670 680 700 710 720
##   64000    0   0   0   0   0
##   77000    0   0   0   0   0
##   78256    0   0   0   0   0
##   82000    1   0   0   0   0
##   85000    0   0   1   0   1
##   86000    0   1   0   0   0
##   88000    0   0   0   0   0
##   88500    0   0   0   0   0
##   90000    0   0   0   0   0
##   92000    0   0   0   1   0
##   93000    0   0   0   0   0
##   95000    2   0   0   0   0
##   96000    0   0   0   0   0
##   96500    0   0   0   0   0
##   97000    0   0   0   0   0
##   98000    1   1   0   1   0
##   99000    0   0   0   0   0
##   100000   0   0   0   1   0
##   100400   0   0   0   0   0
##   101000   0   0   0   0   0
##   101100   0   0   0   0   0
##   101600   0   0   0   0   0
##   102500   1   0   0   0   0
##   103000   0   0   0   0   0
##   104000   0   0   0   0   0
##   105000   0   1   0   0   0
##   106000   0   2   0   0   0
##   107000   0   0   0   0   0
##   107300   0   0   0   0   0
##   107500   0   0   0   0   0
##   108000   0   0   0   0   0
##   110000   0   0   0   0   0
##   112000   1   1   0   0   0
##   115000   0   0   0   1   0
##   118000   0   0   0   0   0
##   120000   1   0   1   0   0
##   126710   0   0   0   0   0
##   130000   0   0   0   0   0
##   145800   0   0   0   0   0
##   146000   0   0   0   0   0
##   162000   0   0   1   0   0
##   220000   0   0   0   0   0

Chisquare test

chisq.test(mytable1)
## Warning in chisq.test(mytable1): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  mytable1
## X-squared = 535.23, df = 451, p-value = 0.003809
#Since p<0.01 there is a relationship betweeen work experience and salary
chisq.test(mytable2)
## Warning in chisq.test(mytable2): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  mytable2
## X-squared = 69.847, df = 41, p-value = 0.003296
#Since p<0.01 we can say that there is a relationship between first language and salary
 chisq.test(mytable3)
## Warning in chisq.test(mytable3): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  mytable3
## X-squared = 927.24, df = 820, p-value = 0.005279
 #Since p<0.01 we see there exists a relationship between Total GMAT score and starting salary.

T-Test

r t.test(placed$salary,placed$work_yrs,var.equal=TRUE, paired=FALSE)

## ## Two Sample t-test ## ## data: placed$salary and placed$work_yrs ## t = 58.516, df = 204, p-value < 2.2e-16 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## 99555.62 106498.49 ## sample estimates: ## mean of x mean of y ## 1.030307e+05 3.679612e+00 Regression Model

 model1 <- lm(salary ~gmat_tot+gmat_qpc+gmat_vpc+gmat_tpc, data = placed)
summary(model1)
## 
## Call:
## lm(formula = salary ~ gmat_tot + gmat_qpc + gmat_vpc + gmat_tpc, 
##     data = placed)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -40370  -8250  -2164   5253 100097 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 109539.54   48054.24   2.279   0.0248 *
## gmat_tot        55.01     181.71   0.303   0.7627  
## gmat_qpc       718.40     541.90   1.326   0.1880  
## gmat_vpc       546.10     543.85   1.004   0.3178  
## gmat_tpc     -1663.16     801.57  -2.075   0.0406 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17670 on 98 degrees of freedom
## Multiple R-squared:  0.06089,    Adjusted R-squared:  0.02256 
## F-statistic: 1.589 on 4 and 98 DF,  p-value: 0.1834
#Gmat_tpc is a significant variable in model 1 The multiple R squared value indicates that the model accounts for 6% of the variance in the variables
#Model 2
model2<- lm(salary ~satis+work_yrs+frstlang, data = placed)
summary(model2)
## 
## Call:
## lm(formula = salary ~ satis + work_yrs + frstlang, data = placed)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -31764  -9640   -604   4816  76193 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  90600.7    13050.3   6.942 4.07e-10 ***
## satis        -1913.1     2000.0  -0.957   0.3411    
## work_yrs      2506.8      528.6   4.742 7.11e-06 ***
## frstlang     13541.5     6305.7   2.147   0.0342 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15740 on 99 degrees of freedom
## Multiple R-squared:  0.2466, Adjusted R-squared:  0.2237 
## F-statistic:  10.8 on 3 and 99 DF,  p-value: 3.354e-06
#Work_yrs and frstlang are significant variables in model 2 The multiple R squared value indicates that the model accounts for 24.66% of the variance in the variables The residual error(15740) can be thought of as the average error in predicting salary using work experience, job satisfaction and first language.
#Model 3
model3 <- lm(salary ~age+sex, data = placed)
summary(model3)
## 
## Call:
## lm(formula = salary ~ age + sex, data = placed)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -29047  -9444  -1750   5428  84503 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  36859.8    14123.5   2.610   0.0105 *  
## age           2653.1      475.1   5.584 2.03e-07 ***
## sex          -3743.6     3372.6  -1.110   0.2697    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15540 on 100 degrees of freedom
## Multiple R-squared:  0.2588, Adjusted R-squared:  0.244 
## F-statistic: 17.46 on 2 and 100 DF,  p-value: 3.144e-07
#Age is a significant factor in model 3

We see that model 2 is better than model 1 and model 3, with a higher R-squared value. 2c. Comparing students with job and without job

mba$Placed = (mba$salary >1000)
mytable_sex <- xtabs(~ Placed+sex, data=mba)
round(ftable(addmargins(prop.table(mytable_sex))),2)
##        sex    1    2  Sum
## Placed                   
## FALSE      0.49 0.14 0.62
## TRUE       0.26 0.11 0.38
## Sum        0.75 0.25 1.00
#26% of men were placed and 11% of women were placed and on the contrary 49% of men were not placed and only 14% of women were not placed.
mytable_frstlang <- xtabs(~ Placed+frstlang, data=mba)
round(ftable(addmargins(prop.table(mytable_frstlang))),2)
##        frstlang    1    2  Sum
## Placed                        
## FALSE           0.53 0.09 0.62
## TRUE            0.35 0.03 0.38
## Sum             0.88 0.12 1.00

Chi square test Hypothesis H1: Satisfaction of MBA course does not depend on salary

chisq.test(mytable_sex)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  mytable_sex
## X-squared = 2.033, df = 1, p-value = 0.1539
chisq.test(mytable_frstlang)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  mytable_frstlang
## X-squared = 3.0938, df = 1, p-value = 0.07859