This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
data3 <- read.csv("StartingSal.csv")
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
describe(data3)
## vars n mean sd median trimmed mad min max
## age 1 274 27.36 3.71 27 26.76 2.97 22 48
## sex 2 274 1.25 0.43 1 1.19 0.00 1 2
## gmat_tot 3 274 619.45 57.54 620 618.86 59.30 450 790
## gmat_qpc 4 274 80.64 14.87 83 82.31 14.83 28 99
## gmat_vpc 5 274 78.32 16.86 81 80.33 14.83 16 99
## gmat_tpc 6 274 84.20 14.02 87 86.12 11.86 0 99
## s_avg 7 274 3.03 0.38 3 3.03 0.44 2 4
## f_avg 8 274 3.06 0.53 3 3.09 0.37 0 4
## quarter 9 274 2.48 1.11 2 2.47 1.48 1 4
## work_yrs 10 274 3.87 3.23 3 3.29 1.48 0 22
## frstlang 11 274 1.12 0.32 1 1.02 0.00 1 2
## salary 12 274 39025.69 50951.56 999 33607.86 1481.12 0 220000
## satis 13 274 172.18 371.61 6 91.50 1.48 1 998
## range skew kurtosis se
## age 26 2.16 6.45 0.22
## sex 1 1.16 -0.66 0.03
## gmat_tot 340 -0.01 0.06 3.48
## gmat_qpc 71 -0.92 0.30 0.90
## gmat_vpc 83 -1.04 0.74 1.02
## gmat_tpc 99 -2.28 9.02 0.85
## s_avg 2 -0.06 -0.38 0.02
## f_avg 4 -2.08 10.85 0.03
## quarter 3 0.02 -1.35 0.07
## work_yrs 22 2.78 9.80 0.20
## frstlang 1 2.37 3.65 0.02
## salary 220000 0.70 -1.05 3078.10
## satis 997 1.77 1.13 22.45
This is the summary of the data.
Description of data and its fields
## age sex gmat_tot gmat_qpc
## Min. :22.00 Min. :1.000 Min. :450.0 Min. :28.00
## 1st Qu.:25.00 1st Qu.:1.000 1st Qu.:580.0 1st Qu.:72.00
## Median :27.00 Median :1.000 Median :620.0 Median :83.00
## Mean :27.36 Mean :1.248 Mean :619.5 Mean :80.64
## 3rd Qu.:29.00 3rd Qu.:1.000 3rd Qu.:660.0 3rd Qu.:93.00
## Max. :48.00 Max. :2.000 Max. :790.0 Max. :99.00
## gmat_vpc gmat_tpc s_avg f_avg
## Min. :16.00 Min. : 0.0 Min. :2.000 Min. :0.000
## 1st Qu.:71.00 1st Qu.:78.0 1st Qu.:2.708 1st Qu.:2.750
## Median :81.00 Median :87.0 Median :3.000 Median :3.000
## Mean :78.32 Mean :84.2 Mean :3.025 Mean :3.062
## 3rd Qu.:91.00 3rd Qu.:94.0 3rd Qu.:3.300 3rd Qu.:3.250
## Max. :99.00 Max. :99.0 Max. :4.000 Max. :4.000
## quarter work_yrs frstlang salary
## Min. :1.000 Min. : 0.000 Min. :1.000 Min. : 0
## 1st Qu.:1.250 1st Qu.: 2.000 1st Qu.:1.000 1st Qu.: 0
## Median :2.000 Median : 3.000 Median :1.000 Median : 999
## Mean :2.478 Mean : 3.872 Mean :1.117 Mean : 39026
## 3rd Qu.:3.000 3rd Qu.: 4.000 3rd Qu.:1.000 3rd Qu.: 97000
## Max. :4.000 Max. :22.000 Max. :2.000 Max. :220000
## satis
## Min. : 1.0
## 1st Qu.: 5.0
## Median : 6.0
## Mean :172.2
## 3rd Qu.: 7.0
## Max. :998.0
## 'data.frame': 274 obs. of 13 variables:
## $ age : int 23 24 24 24 24 24 25 25 25 25 ...
## $ sex : int 2 1 1 1 2 1 1 2 1 1 ...
## $ gmat_tot: int 620 610 670 570 710 640 610 650 630 680 ...
## $ gmat_qpc: int 77 90 99 56 93 82 89 88 79 99 ...
## $ gmat_vpc: int 87 71 78 81 98 89 74 89 91 81 ...
## $ gmat_tpc: int 87 87 95 75 98 91 87 92 89 96 ...
## $ s_avg : num 3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
## $ f_avg : num 3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
## $ quarter : int 1 1 1 1 1 1 1 1 1 1 ...
## $ work_yrs: int 2 2 2 1 2 2 2 2 2 2 ...
## $ frstlang: int 1 1 1 1 1 1 1 1 2 1 ...
## $ salary : int 0 0 0 0 999 0 0 0 999 998 ...
## $ satis : int 7 6 6 7 5 6 5 6 4 998 ...
hist(data3$age,col="green",breaks=8,main="Age Distribution",xlab="Age")
hist(data3$salary,col="red",breaks=30,main="Salary wise distribution",xlab="Salary")
hist(data3$gmat_tot,main="GMAT Score",xlab="Score in GMAT",col="blue")
hist(data3$sex,xlab="Genwise wise distribution",main="Gender",col="green")
hist(data3$work_yrs,xlab="Work Experience",main="Work Experience Distribution",col="red")
hist(data3$s_avg,main="Spring score of students",xlab="Spring Average",col="brown")
newd <- data3[which(data3$satis<=7),]
hist(newd$satis,xlab="Satisfaction",main="Satisfaction with MBA Program",col="green",breaks=5)
hist(data3$quarter,main="Quartile Ranking",xlab="Ranking",col="blue",breaks=4)
data4 <-data3[which(data3$salary!=0 & data3$salary != 998 & data3$salary != 999),]
boxplot(data4$salary~data4$sex,main="Plot of Salary vs Age",ylab="Sex",horizontal=TRUE)
boxplot(data4$salary~data4$frstlang,main="Plot of Salary vs first language",ylab="FIRST LANGUAGE",horizontal=TRUE)
library(car)
## Warning: package 'car' was built under R version 3.4.3
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
data4 <-data3[which(data3$salary!=0 & data3$salary != 998 & data3$salary != 999),]
scatterplot(data4$salary~data4$age,xlab="Age",ylab="Salary")
scatterplot(data4$salary~data4$quarter,xlab="Percentile",ylab="Salary")
scatterplot(data4$salary~data4$work_yrs,xlab="Work Experience",ylab="Salary")
scatterplot(data4$salary~data4$gmat_tot,xlab="GMat Total",ylab="Salary")
scatterplot(data4$salary~data4$f_avg,xlab="Fall Average",ylab="Salary")
scatterplot(data4$salary~data4$s_avg,xlab="Spring Average",ylab="Salary")
scatterplot(data4$salary~data4$satis,xlab="Age",ylab="Satisfaction")
library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(data4,order = TRUE,text.panel = panel.txt,lower.panel = panel.shade,upper.panel = panel.pie,main="Corrogram")
round(cor(data4),2)
## age sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg
## age 1.00 -0.14 -0.08 -0.17 0.02 -0.10 0.16 -0.22
## sex -0.14 1.00 -0.02 -0.15 0.05 -0.05 0.08 0.17
## gmat_tot -0.08 -0.02 1.00 0.67 0.78 0.97 0.17 0.12
## gmat_qpc -0.17 -0.15 0.67 1.00 0.09 0.66 0.02 0.10
## gmat_vpc 0.02 0.05 0.78 0.09 1.00 0.78 0.16 0.02
## gmat_tpc -0.10 -0.05 0.97 0.66 0.78 1.00 0.14 0.07
## s_avg 0.16 0.08 0.17 0.02 0.16 0.14 1.00 0.45
## f_avg -0.22 0.17 0.12 0.10 0.02 0.07 0.45 1.00
## quarter -0.13 -0.02 -0.11 0.01 -0.13 -0.10 -0.84 -0.43
## work_yrs 0.88 -0.09 -0.12 -0.18 -0.03 -0.13 0.16 -0.22
## frstlang 0.35 0.08 -0.13 0.01 -0.22 -0.16 -0.14 -0.05
## salary 0.50 -0.17 -0.09 0.01 -0.14 -0.13 0.10 -0.11
## satis 0.11 -0.09 0.06 0.00 0.15 0.12 -0.14 -0.12
## quarter work_yrs frstlang salary satis
## age -0.13 0.88 0.35 0.50 0.11
## sex -0.02 -0.09 0.08 -0.17 -0.09
## gmat_tot -0.11 -0.12 -0.13 -0.09 0.06
## gmat_qpc 0.01 -0.18 0.01 0.01 0.00
## gmat_vpc -0.13 -0.03 -0.22 -0.14 0.15
## gmat_tpc -0.10 -0.13 -0.16 -0.13 0.12
## s_avg -0.84 0.16 -0.14 0.10 -0.14
## f_avg -0.43 -0.22 -0.05 -0.11 -0.12
## quarter 1.00 -0.13 0.11 -0.13 0.23
## work_yrs -0.13 1.00 0.20 0.45 0.06
## frstlang 0.11 0.20 1.00 0.27 0.09
## salary -0.13 0.45 0.27 1.00 -0.04
## satis 0.23 0.06 0.09 -0.04 1.00
cor(data4$salary,data4)
## age sex gmat_tot gmat_qpc gmat_vpc gmat_tpc
## [1,] 0.4996428 -0.1662887 -0.09067141 0.0141413 -0.1374323 -0.1320178
## s_avg f_avg quarter work_yrs frstlang salary satis
## [1,] 0.1017317 -0.106039 -0.1284853 0.4546663 0.2670195 1 -0.0400506
table1 <- xtabs(~salary+sex,data = data4)
table1
## sex
## salary 1 2
## 64000 0 1
## 77000 1 0
## 78256 0 1
## 82000 0 1
## 85000 1 3
## 86000 0 2
## 88000 0 1
## 88500 1 0
## 90000 3 0
## 92000 2 1
## 93000 2 1
## 95000 4 3
## 96000 3 1
## 96500 1 0
## 97000 2 0
## 98000 6 4
## 99000 0 1
## 100000 4 5
## 100400 1 0
## 101000 0 2
## 101100 1 0
## 101600 1 0
## 102500 1 0
## 103000 1 0
## 104000 2 0
## 105000 11 0
## 106000 2 1
## 107000 1 0
## 107300 1 0
## 107500 1 0
## 108000 2 0
## 110000 0 1
## 112000 3 0
## 115000 5 0
## 118000 1 0
## 120000 3 1
## 126710 1 0
## 130000 1 0
## 145800 1 0
## 146000 1 0
## 162000 1 0
## 220000 0 1
table2 <- xtabs(~salary+frstlang,data = data4)
table2
## frstlang
## salary 1 2
## 64000 1 0
## 77000 1 0
## 78256 1 0
## 82000 1 0
## 85000 4 0
## 86000 2 0
## 88000 1 0
## 88500 1 0
## 90000 3 0
## 92000 3 0
## 93000 3 0
## 95000 7 0
## 96000 4 0
## 96500 1 0
## 97000 2 0
## 98000 8 2
## 99000 0 1
## 100000 9 0
## 100400 1 0
## 101000 2 0
## 101100 1 0
## 101600 1 0
## 102500 1 0
## 103000 1 0
## 104000 1 1
## 105000 11 0
## 106000 3 0
## 107000 1 0
## 107300 0 1
## 107500 1 0
## 108000 2 0
## 110000 1 0
## 112000 3 0
## 115000 5 0
## 118000 0 1
## 120000 4 0
## 126710 1 0
## 130000 1 0
## 145800 1 0
## 146000 1 0
## 162000 1 0
## 220000 0 1
table3 <- xtabs(~salary+work_yrs,data = data4)
table3
## work_yrs
## salary 0 1 2 3 4 5 6 7 8 10 15 16
## 64000 0 0 1 0 0 0 0 0 0 0 0 0
## 77000 0 0 1 0 0 0 0 0 0 0 0 0
## 78256 0 1 0 0 0 0 0 0 0 0 0 0
## 82000 0 1 0 0 0 0 0 0 0 0 0 0
## 85000 0 1 2 1 0 0 0 0 0 0 0 0
## 86000 0 0 1 1 0 0 0 0 0 0 0 0
## 88000 0 0 0 1 0 0 0 0 0 0 0 0
## 88500 0 0 0 1 0 0 0 0 0 0 0 0
## 90000 0 0 2 0 0 1 0 0 0 0 0 0
## 92000 0 0 3 0 0 0 0 0 0 0 0 0
## 93000 0 0 0 0 1 1 0 0 1 0 0 0
## 95000 1 1 2 2 0 1 0 0 0 0 0 0
## 96000 0 1 2 0 1 0 0 0 0 0 0 0
## 96500 0 0 1 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 1 1 0 0 0 0 0 0 0
## 98000 0 0 7 1 1 0 0 1 0 0 0 0
## 99000 0 0 0 0 0 1 0 0 0 0 0 0
## 100000 0 0 6 1 1 0 1 0 0 0 0 0
## 100400 0 0 0 1 0 0 0 0 0 0 0 0
## 101000 0 0 2 0 0 0 0 0 0 0 0 0
## 101100 0 0 0 0 0 0 0 0 1 0 0 0
## 101600 0 0 0 1 0 0 0 0 0 0 0 0
## 102500 0 0 0 0 0 0 1 0 0 0 0 0
## 103000 0 0 0 1 0 0 0 0 0 0 0 0
## 104000 0 0 0 0 2 0 0 0 0 0 0 0
## 105000 0 0 4 4 0 1 1 0 0 0 0 1
## 106000 0 0 0 0 0 0 2 0 1 0 0 0
## 107000 0 0 1 0 0 0 0 0 0 0 0 0
## 107300 0 0 1 0 0 0 0 0 0 0 0 0
## 107500 0 0 0 1 0 0 0 0 0 0 0 0
## 108000 0 0 0 1 1 0 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 1 0 0 0 0 0
## 112000 0 0 1 0 0 0 1 0 0 0 0 1
## 115000 0 2 0 1 2 0 0 0 0 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 1 0 0
## 120000 0 0 0 1 0 2 0 0 1 0 0 0
## 126710 0 0 0 1 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 1 0 0 0 0 0 0 0
## 145800 0 0 1 0 0 0 0 0 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 1 0
## 162000 0 1 0 0 0 0 0 0 0 0 0 0
## 220000 0 0 0 0 0 0 0 0 0 0 1 0
t.test(salary~sex,data = data4,var.equal=TRUE)
##
## Two Sample t-test
##
## data: salary by sex
## t = 1.6948, df = 101, p-value = 0.0932
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1099.123 13992.293
## sample estimates:
## mean in group 1 mean in group 2
## 104970.97 98524.39
chisq.test(table2)
## Warning in chisq.test(table2): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: table2
## X-squared = 69.847, df = 41, p-value = 0.003296
chisq.test(table3)
## Warning in chisq.test(table3): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: table3
## X-squared = 535.23, df = 451, p-value = 0.003809
cor.test(data4$salary,data4$gmat_tot)
##
## Pearson's product-moment correlation
##
## data: data4$salary and data4$gmat_tot
## t = -0.91501, df = 101, p-value = 0.3624
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2792952 0.1046903
## sample estimates:
## cor
## -0.09067141
cor.test(data4$salary,data4$s_avg)
##
## Pearson's product-moment correlation
##
## data: data4$salary and data4$s_avg
## t = 1.0277, df = 101, p-value = 0.3065
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.09363639 0.28955576
## sample estimates:
## cor
## 0.1017317
cor.test(data4$salary,data4$satis)
##
## Pearson's product-moment correlation
##
## data: data4$salary and data4$satis
## t = -0.40283, df = 101, p-value = 0.6879
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2317788 0.1546729
## sample estimates:
## cor
## -0.0400506
cor.test(data4$salary,data4$age)
##
## Pearson's product-moment correlation
##
## data: data4$salary and data4$age
## t = 5.7968, df = 101, p-value = 7.748e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3388862 0.6320523
## sample estimates:
## cor
## 0.4996428
chisq.test(data4$gmat_tot,data4$gmat_tpc)
## Warning in chisq.test(data4$gmat_tot, data4$gmat_tpc): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: data4$gmat_tot and data4$gmat_tpc
## X-squared = 1651.9, df = 600, p-value < 2.2e-16
chisq.test(data4$salary,data4$quarter)
## Warning in chisq.test(data4$salary, data4$quarter): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: data4$salary and data4$quarter
## X-squared = 129.85, df = 123, p-value = 0.3186
m1 <- lm(salary~sex+age+work_yrs+gmat_tot+frstlang+s_avg+f_avg+quarter+satis,data = data4)
summary(m1)
##
## Call:
## lm(formula = salary ~ sex + age + work_yrs + gmat_tot + frstlang +
## s_avg + f_avg + quarter + satis, data = data4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24034 -8529 -1589 5875 80478
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 78541.32 41033.00 1.914 0.0587 .
## sex -4956.56 3544.15 -1.399 0.1653
## age 1637.25 1129.35 1.450 0.1505
## work_yrs 792.84 1150.00 0.689 0.4923
## gmat_tot -11.17 32.05 -0.349 0.7282
## frstlang 11069.51 7150.49 1.548 0.1250
## s_avg -779.00 8077.43 -0.096 0.9234
## f_avg -958.56 3869.78 -0.248 0.8049
## quarter -1633.49 2657.39 -0.615 0.5403
## satis -1987.24 2084.85 -0.953 0.3430
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15720 on 93 degrees of freedom
## Multiple R-squared: 0.2942, Adjusted R-squared: 0.2259
## F-statistic: 4.308 on 9 and 93 DF, p-value: 0.0001058
m2 <- lm(salary~sex+age+work_yrs+gmat_tot+frstlang+f_avg+quarter+satis,data = data4)
summary(m2)
##
## Call:
## lm(formula = salary ~ sex + age + work_yrs + gmat_tot + frstlang +
## f_avg + quarter + satis, data = data4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24686 -8560 -1526 5832 80612
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 76486.27 34880.98 2.193 0.0308 *
## sex -5003.41 3492.15 -1.433 0.1552
## age 1627.90 1119.24 1.454 0.1491
## work_yrs 789.59 1143.43 0.691 0.4916
## gmat_tot -11.61 31.56 -0.368 0.7138
## frstlang 11174.82 7029.28 1.590 0.1152
## f_avg -1035.59 3766.44 -0.275 0.7840
## quarter -1434.10 1660.78 -0.864 0.3901
## satis -2001.75 2068.42 -0.968 0.3356
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15640 on 94 degrees of freedom
## Multiple R-squared: 0.2942, Adjusted R-squared: 0.2341
## F-statistic: 4.897 on 8 and 94 DF, p-value: 4.506e-05
m3 <- lm(salary~sex+age+work_yrs+gmat_tot+frstlang+quarter+satis,data = data4)
summary(m3)
##
## Call:
## lm(formula = salary ~ sex + age + work_yrs + gmat_tot + frstlang +
## quarter + satis, data = data4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25054 -8678 -1597 5943 80180
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72694.79 31883.25 2.280 0.0248 *
## sex -5128.21 3445.64 -1.488 0.1400
## age 1657.58 1108.59 1.495 0.1382
## work_yrs 807.86 1135.93 0.711 0.4787
## gmat_tot -12.18 31.34 -0.388 0.6985
## frstlang 10999.97 6966.32 1.579 0.1177
## quarter -1220.16 1460.07 -0.836 0.4054
## satis -2011.88 2058.01 -0.978 0.3308
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15560 on 95 degrees of freedom
## Multiple R-squared: 0.2936, Adjusted R-squared: 0.2415
## F-statistic: 5.64 on 7 and 95 DF, p-value: 1.834e-05
m4 <- lm(salary~sex+age+work_yrs+frstlang+quarter+satis,data = data4)
summary(m4)
##
## Call:
## lm(formula = salary ~ sex + age + work_yrs + frstlang + quarter +
## satis, data = data4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24386 -8915 -1714 6283 80982
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 65982.8 26677.0 2.473 0.0151 *
## sex -5128.3 3430.4 -1.495 0.1382
## age 1617.6 1098.9 1.472 0.1443
## work_yrs 870.2 1119.5 0.777 0.4389
## frstlang 11345.3 6878.7 1.649 0.1023
## quarter -1150.8 1442.7 -0.798 0.4270
## satis -2092.2 2038.5 -1.026 0.3073
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15490 on 96 degrees of freedom
## Multiple R-squared: 0.2925, Adjusted R-squared: 0.2482
## F-statistic: 6.614 on 6 and 96 DF, p-value: 7.096e-06
m5 <- lm(salary~sex+age+frstlang+quarter+satis,data = data4)
summary(m5)
##
## Call:
## lm(formula = salary ~ sex + age + frstlang + quarter + satis,
## data = data4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24008 -9061 -1996 6363 81963
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50594.0 17844.4 2.835 0.00557 **
## sex -4840.5 3403.4 -1.422 0.15815
## age 2369.6 520.1 4.556 1.52e-05 ***
## frstlang 9942.1 6624.1 1.501 0.13663
## quarter -1126.3 1439.4 -0.783 0.43582
## satis -2173.4 2031.7 -1.070 0.28739
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15460 on 97 degrees of freedom
## Multiple R-squared: 0.288, Adjusted R-squared: 0.2513
## F-statistic: 7.848 on 5 and 97 DF, p-value: 3.074e-06
m1 <- lm(salary~sex+age+frstlang+satis,data = data4)
summary(m1)
##
## Call:
## lm(formula = salary ~ sex + age + frstlang + satis, data = data4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25463 -9177 -1636 5686 79645
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48730.8 17649.8 2.761 0.00688 **
## sex -4720.6 3393.2 -1.391 0.16732
## age 2452.8 508.1 4.827 5.1e-06 ***
## frstlang 9105.5 6524.3 1.396 0.16598
## satis -2542.7 1972.2 -1.289 0.20034
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15430 on 98 degrees of freedom
## Multiple R-squared: 0.2835, Adjusted R-squared: 0.2543
## F-statistic: 9.695 on 4 and 98 DF, p-value: 1.197e-06
m7 <- lm(salary~sex+age+frstlang,data = data4)
summary(m7)
##
## Call:
## lm(formula = salary ~ sex + age + frstlang, data = data4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28723 -9214 -1296 5524 80180
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 35035.9 14142.6 2.477 0.0149 *
## sex -4343.6 3391.8 -1.281 0.2033
## age 2409.7 508.7 4.737 7.26e-06 ***
## frstlang 8541.4 6531.3 1.308 0.1940
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15480 on 99 degrees of freedom
## Multiple R-squared: 0.2714, Adjusted R-squared: 0.2493
## F-statistic: 12.29 on 3 and 99 DF, p-value: 6.687e-07
m8 <- lm(salary~age+frstlang,data = data4)
summary(m8)
##
## Call:
## lm(formula = salary ~ age + frstlang, data = data4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31507 -8412 -2035 4493 76632
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27421.3 12872.8 2.130 0.0356 *
## age 2528.2 501.8 5.038 2.09e-06 ***
## frstlang 7409.9 6492.0 1.141 0.2564
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15530 on 100 degrees of freedom
## Multiple R-squared: 0.2593, Adjusted R-squared: 0.2445
## F-statistic: 17.5 on 2 and 100 DF, p-value: 3.036e-07
SO the parameters affecting salary found by backward regression models are Age and FirstLanguage.
nojob <- data3[which(data3$salary=="0"),]
data4 <- data4[1:90,]
chisq.test(data4$age,nojob$age)
## Warning in chisq.test(data4$age, nojob$age): Chi-squared approximation may
## be incorrect
##
## Pearson's Chi-squared test
##
## data: data4$age and nojob$age
## X-squared = 229.27, df = 252, p-value = 0.8449
chisq.test(data4$sex,nojob$sex)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: data4$sex and nojob$sex
## X-squared = 0.11711, df = 1, p-value = 0.7322
chisq.test(data4$work_yrs,nojob$work_yrs)
## Warning in chisq.test(data4$work_yrs, nojob$work_yrs): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: data4$work_yrs and nojob$work_yrs
## X-squared = 117.66, df = 176, p-value = 0.9998
chisq.test(data4$quarter,nojob$quarter)
## Warning in chisq.test(data4$quarter, nojob$quarter): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: data4$quarter and nojob$quarter
## X-squared = 110.98, df = 9, p-value < 2.2e-16
chisq.test(data4$gmat_tot,nojob$gmat_tot)
## Warning in chisq.test(data4$gmat_tot, nojob$gmat_tot): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: data4$gmat_tot and nojob$gmat_tot
## X-squared = 496.72, df = 500, p-value = 0.533
SO the relevant factor in getting job is Quartile.