MBAstartingsalaries <- read.csv(paste("MBAStartingsalariesData.csv", sep=""))
View(MBAstartingsalaries)
attach(MBAstartingsalaries)
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
describe(MBAstartingsalaries)
## vars n mean sd median trimmed mad min max
## age 1 274 27.36 3.71 27 26.76 2.97 22 48
## sex 2 274 1.25 0.43 1 1.19 0.00 1 2
## gmat_tot 3 274 619.45 57.54 620 618.86 59.30 450 790
## gmat_qpc 4 274 80.64 14.87 83 82.31 14.83 28 99
## gmat_vpc 5 274 78.32 16.86 81 80.33 14.83 16 99
## gmat_tpc 6 274 84.20 14.02 87 86.12 11.86 0 99
## s_avg 7 274 3.03 0.38 3 3.03 0.44 2 4
## f_avg 8 274 3.06 0.53 3 3.09 0.37 0 4
## quarter 9 274 2.48 1.11 2 2.47 1.48 1 4
## work_yrs 10 274 3.87 3.23 3 3.29 1.48 0 22
## frstlang 11 274 1.12 0.32 1 1.02 0.00 1 2
## salary 12 274 39025.69 50951.56 999 33607.86 1481.12 0 220000
## satis 13 274 172.18 371.61 6 91.50 1.48 1 998
## range skew kurtosis se
## age 26 2.16 6.45 0.22
## sex 1 1.16 -0.66 0.03
## gmat_tot 340 -0.01 0.06 3.48
## gmat_qpc 71 -0.92 0.30 0.90
## gmat_vpc 83 -1.04 0.74 1.02
## gmat_tpc 99 -2.28 9.02 0.85
## s_avg 2 -0.06 -0.38 0.02
## f_avg 4 -2.08 10.85 0.03
## quarter 3 0.02 -1.35 0.07
## work_yrs 22 2.78 9.80 0.20
## frstlang 1 2.37 3.65 0.02
## salary 220000 0.70 -1.05 3078.10
## satis 997 1.77 1.13 22.45
summary(MBAstartingsalaries)
## age sex gmat_tot gmat_qpc
## Min. :22.00 Min. :1.000 Min. :450.0 Min. :28.00
## 1st Qu.:25.00 1st Qu.:1.000 1st Qu.:580.0 1st Qu.:72.00
## Median :27.00 Median :1.000 Median :620.0 Median :83.00
## Mean :27.36 Mean :1.248 Mean :619.5 Mean :80.64
## 3rd Qu.:29.00 3rd Qu.:1.000 3rd Qu.:660.0 3rd Qu.:93.00
## Max. :48.00 Max. :2.000 Max. :790.0 Max. :99.00
## gmat_vpc gmat_tpc s_avg f_avg
## Min. :16.00 Min. : 0.0 Min. :2.000 Min. :0.000
## 1st Qu.:71.00 1st Qu.:78.0 1st Qu.:2.708 1st Qu.:2.750
## Median :81.00 Median :87.0 Median :3.000 Median :3.000
## Mean :78.32 Mean :84.2 Mean :3.025 Mean :3.062
## 3rd Qu.:91.00 3rd Qu.:94.0 3rd Qu.:3.300 3rd Qu.:3.250
## Max. :99.00 Max. :99.0 Max. :4.000 Max. :4.000
## quarter work_yrs frstlang salary
## Min. :1.000 Min. : 0.000 Min. :1.000 Min. : 0
## 1st Qu.:1.250 1st Qu.: 2.000 1st Qu.:1.000 1st Qu.: 0
## Median :2.000 Median : 3.000 Median :1.000 Median : 999
## Mean :2.478 Mean : 3.872 Mean :1.117 Mean : 39026
## 3rd Qu.:3.000 3rd Qu.: 4.000 3rd Qu.:1.000 3rd Qu.: 97000
## Max. :4.000 Max. :22.000 Max. :2.000 Max. :220000
## satis
## Min. : 1.0
## 1st Qu.: 5.0
## Median : 6.0
## Mean :172.2
## 3rd Qu.: 7.0
## Max. :998.0
Boxploting of indivisual variables
boxplot(MBAstartingsalaries$age,MBAstartingsalaries$s_avg,
MBAstartingsalaries$f_avg)
boxplot(MBAstartingsalaries$gmat_tot,MBAstartingsalaries$gmat_qpc,
MBAstartingsalaries$gmat_vpc,MBAstartingsalaries$gmat_tpc)
boxplot(MBAstartingsalaries$quarter,MBAstartingsalaries$work_yrs,
MBAstartingsalaries$frstlang)
boxplot(MBAstartingsalaries$salary,MBAstartingsalaries$satis)
# Scatterploting to find correlation between different variable
library(car)
## Warning: package 'car' was built under R version 3.4.3
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(MBAstartingsalaries$gmat_tpc~ MBAstartingsalaries$gmat_qpc,MBAstartingsalaries$gmat_vpc, data= MBAstartingsalaries,
spread=FALSE, smoother.args=list(lty=2), pch=19,
main="Scatter plot of gmat taotal percentile vs gmat subject",
xlab="Gmat totaol percentile",
ylab="Subject percentile")
# relation between gmat total percentile and Strating salary
scatterplot(MBAstartingsalaries$gmat_tpc~ MBAstartingsalaries$salary, data= MBAstartingsalaries,
spread=FALSE, smoother.args=list(lty=2), pch=19,
main="Scatter plot of gmat taotal percentile vs gmat subject",
xlab="Strating salaries", ylab="Gmat total percentiles")
# relation between quaterile rank and strating salary
scatterplot(MBAstartingsalaries$quarter~ MBAstartingsalaries$salary, data= MBAstartingsalaries,
spread=FALSE, smoother.args=list(lty=2), pch=19,
main="Scatter plot of gmat taotal percentile vs gmat subject",
xlab="Strating salaries", ylab="Quartile ranking")
#Corrgram analysis of MBA strating salary
library(corrgram)
## Warning: package 'corrgram' was built under R version 3.4.3
corrgram(MBAstartingsalaries, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="MBA starting salary analysis Corrgram")
x <- MBAstartingsalaries[,c("age", "gmat_tot", "gmat_qpc", "gmat_vpc","gmat_tpc","s_avg","f_avg","work_yrs","salary")]
y <- MBAstartingsalaries[,c("age", "gmat_tot", "gmat_qpc", "gmat_vpc","gmat_tpc","s_avg","f_avg","work_yrs","salary")]
cov(x,y)
## age gmat_tot gmat_qpc gmat_vpc
## age 1.376904e+01 -3.115879e+01 -1.192655e+01 -2.763643
## gmat_tot -3.115879e+01 3.310688e+03 6.200233e+02 726.000642
## gmat_qpc -1.192655e+01 6.200233e+02 2.210731e+02 38.148258
## gmat_vpc -2.763643e+00 7.260006e+02 3.814826e+01 284.248122
## gmat_tpc -8.839978e+00 6.839911e+02 1.357997e+02 157.493249
## s_avg 2.116874e-01 2.480257e+00 -1.691233e-01 1.313570
## f_avg -3.399348e-02 3.154688e+00 5.753854e-01 0.672070
## work_yrs 1.029494e+01 -3.391634e+01 -1.137186e+01 -3.618165
## salary -1.183042e+04 -1.611600e+05 -3.335823e+04 -5273.852384
## gmat_tpc s_avg f_avg work_yrs salary
## age -8.8399775 0.2116874 -0.03399348 10.2949386 -1.183042e+04
## gmat_tot 683.9910698 2.4802572 3.15468838 -33.9163391 -1.611600e+05
## gmat_qpc 135.7996845 -0.1691233 0.57538542 -11.3718617 -3.335823e+04
## gmat_vpc 157.4932488 1.3135702 0.67207000 -3.6181653 -5.273852e+03
## gmat_tpc 196.6057057 0.6271001 0.58698618 -7.8575172 3.522750e+03
## s_avg 0.6271001 0.1452176 0.11016898 0.1592639 2.831601e+03
## f_avg 0.5869862 0.1101690 0.27567237 -0.0662870 7.876560e+02
## work_yrs -7.8575172 0.1592639 -0.06628700 10.4488249 1.486147e+03
## salary 3522.7500067 2831.6009858 787.65597177 1486.1470415 2.596062e+09
job.df <- MBAstartingsalaries[ which(MBAstartingsalaries$salary !="998" & MBAstartingsalaries$salary !="999" & MBAstartingsalaries$salary!="0"), ]
job.df
## age sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 35 22 2 660 90 92 94 3.50 3.75 1
## 36 27 2 700 94 98 98 3.30 3.25 1
## 37 25 2 680 87 96 96 3.50 2.67 1
## 38 25 2 650 82 91 93 3.40 3.25 1
## 39 27 1 710 96 96 98 3.30 3.50 1
## 40 28 2 620 52 98 87 3.40 3.75 1
## 41 24 1 670 84 96 95 3.30 3.25 1
## 42 25 2 560 52 81 72 3.30 3.50 1
## 43 25 2 530 50 62 61 3.60 3.67 1
## 44 25 1 650 79 93 93 3.30 3.50 1
## 45 26 2 590 56 89 81 3.30 3.25 1
## 46 23 2 650 93 81 93 3.40 3.00 1
## 47 24 1 560 81 50 71 3.40 3.67 1
## 48 27 1 610 72 84 86 3.30 3.50 1
## 49 25 1 650 95 84 93 3.30 3.00 1
## 50 25 1 550 74 50 68 3.50 3.50 1
## 51 26 1 570 68 74 75 3.80 3.50 1
## 52 26 1 580 79 71 78 3.45 3.50 1
## 53 30 1 600 60 91 83 3.30 3.25 1
## 54 31 1 570 72 71 75 3.60 3.50 1
## 55 30 1 620 60 96 87 3.50 3.00 1
## 56 30 2 680 96 87 96 3.70 3.60 1
## 57 27 1 630 93 75 91 3.30 3.25 1
## 58 25 1 600 82 74 83 3.50 3.25 1
## 59 28 2 640 89 81 91 3.60 3.50 1
## 60 39 1 600 72 81 83 3.60 3.50 1
## 61 27 1 570 95 33 75 3.70 4.00 1
## 62 27 1 710 95 98 98 3.60 3.50 1
## 63 33 1 620 72 89 87 3.50 3.50 1
## 64 27 1 600 67 84 83 3.50 3.00 1
## 65 28 1 700 95 95 98 3.80 4.00 1
## 66 30 1 600 77 81 84 3.50 3.25 1
## 67 30 2 670 87 95 95 3.30 3.25 1
## 68 40 1 630 71 95 91 4.00 0.00 1
## 69 25 1 700 98 93 98 3.60 3.75 1
## 115 26 2 670 87 95 95 3.10 3.33 2
## 116 25 2 620 89 74 87 3.10 3.50 2
## 117 31 1 540 60 62 65 3.10 3.00 2
## 118 25 1 670 95 89 95 3.20 3.50 2
## 119 25 1 610 87 71 86 3.27 3.25 2
## 120 24 1 560 52 81 72 3.20 3.25 2
## 121 24 1 500 78 30 52 3.00 2.75 2
## 122 23 1 590 72 81 81 3.20 3.25 2
## 123 24 1 570 82 58 75 3.20 3.25 2
## 124 26 2 570 93 37 75 3.00 2.75 2
## 125 28 2 580 83 58 79 3.10 3.00 2
## 126 24 2 580 72 71 78 3.00 3.25 2
## 127 31 1 560 68 67 72 3.09 3.00 2
## 128 25 2 620 89 74 87 3.10 3.50 2
## 129 27 1 620 97 63 88 3.20 3.00 2
## 130 28 1 560 75 58 72 3.20 3.25 2
## 131 26 1 680 84 96 96 3.20 3.25 2
## 132 27 1 620 81 87 89 3.00 3.00 2
## 133 34 1 550 72 58 69 3.00 3.00 2
## 134 26 1 600 84 67 83 3.09 3.50 2
## 135 29 1 670 91 93 95 3.10 3.00 2
## 136 24 1 620 84 81 87 3.00 3.25 2
## 137 27 1 630 72 95 89 3.20 3.00 2
## 138 26 1 650 89 87 93 3.20 3.25 2
## 139 24 1 620 88 74 87 3.10 3.00 2
## 186 23 2 520 43 67 58 2.90 2.75 3
## 187 27 1 620 87 74 87 2.70 2.75 3
## 188 25 1 580 78 67 80 2.90 3.25 3
## 189 25 1 630 75 93 89 2.70 2.50 3
## 190 25 1 610 89 74 87 2.70 2.75 3
## 191 29 2 560 64 71 72 2.90 3.00 3
## 192 27 1 620 79 87 88 2.90 2.75 3
## 193 28 1 580 72 71 78 2.80 3.00 3
## 194 24 2 670 83 98 96 2.90 3.25 3
## 195 25 2 560 39 91 72 2.90 3.00 3
## 196 25 2 580 72 71 78 2.80 3.25 3
## 197 27 1 680 97 90 97 2.90 2.75 3
## 198 28 1 610 89 67 86 2.70 3.00 3
## 199 29 1 710 93 98 99 2.90 3.25 3
## 200 24 1 710 99 92 99 2.90 3.00 3
## 201 25 2 630 84 87 89 2.80 2.75 3
## 202 24 2 600 89 67 85 2.80 3.00 3
## 203 29 1 660 91 90 95 2.80 3.00 3
## 204 30 1 670 83 97 96 2.80 2.75 3
## 205 24 1 580 89 54 78 2.91 2.83 3
## 206 29 1 680 79 99 96 2.90 3.00 3
## 207 32 1 660 83 95 94 2.90 3.50 3
## 208 28 1 570 56 84 75 2.90 3.00 3
## 209 24 1 680 96 87 97 2.80 2.75 3
## 256 24 2 560 55 78 71 3.50 3.25 4
## 257 23 1 660 81 98 95 2.50 3.00 4
## 258 25 2 720 96 98 99 3.50 3.60 4
## 259 26 1 620 78 87 89 2.40 2.00 4
## 260 26 2 630 85 81 90 2.90 3.25 4
## 261 27 1 650 89 89 93 2.40 2.25 4
## 262 25 1 660 99 71 95 3.40 3.25 4
## 263 25 1 610 83 81 86 2.40 2.75 4
## 264 26 1 600 87 62 83 2.50 2.50 4
## 265 24 1 570 75 62 75 2.30 2.50 4
## 266 24 2 600 77 78 84 2.60 3.00 4
## 267 26 2 650 91 84 93 2.60 3.00 4
## 268 29 1 630 72 95 89 2.60 2.50 4
## 269 26 1 630 96 71 91 2.60 2.75 4
## 270 31 1 530 75 45 62 2.40 2.75 4
## 271 23 1 580 64 81 78 2.20 2.00 4
## 272 25 1 540 79 45 65 2.60 2.50 4
## 273 26 1 550 72 58 69 2.60 2.75 4
## 274 40 2 500 60 45 51 2.50 2.75 4
## work_yrs frstlang salary satis
## 35 1 1 85000 5
## 36 2 1 85000 6
## 37 2 1 86000 5
## 38 3 1 88000 7
## 39 2 1 92000 6
## 40 5 1 93000 5
## 41 0 1 95000 4
## 42 1 1 95000 5
## 43 3 1 95000 3
## 44 1 1 96000 7
## 45 4 1 96000 5
## 46 2 1 100000 7
## 47 2 1 100000 6
## 48 6 1 100000 6
## 49 2 1 105000 7
## 50 3 1 105000 6
## 51 3 1 105000 6
## 52 2 1 105000 5
## 53 5 1 105000 6
## 54 6 1 105000 6
## 55 8 1 106000 7
## 56 6 1 106000 6
## 57 3 1 107500 5
## 58 3 1 108000 6
## 59 6 1 110000 5
## 60 16 1 112000 7
## 61 4 1 115000 5
## 62 1 1 115000 5
## 63 10 2 118000 7
## 64 3 1 120000 5
## 65 5 1 120000 5
## 66 5 1 120000 6
## 67 8 1 120000 6
## 68 15 1 146000 6
## 69 1 1 162000 5
## 115 1 1 82000 7
## 116 2 1 92000 5
## 117 8 1 93000 6
## 118 2 1 95000 6
## 119 3 1 95000 6
## 120 2 1 96000 7
## 121 2 1 96500 6
## 122 2 1 98000 6
## 123 2 1 98000 6
## 124 3 2 98000 5
## 125 5 2 99000 6
## 126 2 1 100000 5
## 127 4 1 100000 6
## 128 2 1 101000 5
## 129 3 1 103000 6
## 130 4 1 104000 5
## 131 3 1 105000 6
## 132 3 1 105000 5
## 133 16 1 105000 5
## 134 2 1 107000 5
## 135 6 1 112000 6
## 136 1 1 115000 6
## 137 4 1 115000 6
## 138 4 1 130000 7
## 139 2 1 145800 6
## 186 1 1 78256 5
## 187 3 1 88500 6
## 188 2 1 90000 7
## 189 2 1 90000 5
## 190 4 1 93000 6
## 191 5 1 95000 7
## 192 4 1 97000 7
## 193 3 1 97000 6
## 194 2 1 98000 7
## 195 2 1 98000 7
## 196 2 1 98000 6
## 197 2 2 98000 6
## 198 4 1 98000 7
## 199 7 1 98000 5
## 200 3 1 100000 6
## 201 2 1 100000 6
## 202 2 1 101000 6
## 203 8 1 101100 6
## 204 6 1 102500 5
## 205 2 1 105000 5
## 206 6 1 106000 6
## 207 2 2 107300 7
## 208 4 1 108000 6
## 209 2 1 112000 6
## 256 2 1 64000 7
## 257 2 1 77000 6
## 258 3 1 85000 6
## 259 2 1 85000 6
## 260 3 1 86000 5
## 261 5 1 90000 5
## 262 2 1 92000 7
## 263 2 1 95000 7
## 264 2 1 96000 6
## 265 2 1 98000 6
## 266 2 1 100000 6
## 267 2 1 100000 7
## 268 3 1 100400 7
## 269 3 1 101600 6
## 270 4 2 104000 6
## 271 2 1 105000 6
## 272 3 1 115000 5
## 273 3 1 126710 6
## 274 15 2 220000 6
tab1 <-xtabs(~salary+sex,data=job.df)
tab1
## sex
## salary 1 2
## 64000 0 1
## 77000 1 0
## 78256 0 1
## 82000 0 1
## 85000 1 3
## 86000 0 2
## 88000 0 1
## 88500 1 0
## 90000 3 0
## 92000 2 1
## 93000 2 1
## 95000 4 3
## 96000 3 1
## 96500 1 0
## 97000 2 0
## 98000 6 4
## 99000 0 1
## 100000 4 5
## 100400 1 0
## 101000 0 2
## 101100 1 0
## 101600 1 0
## 102500 1 0
## 103000 1 0
## 104000 2 0
## 105000 11 0
## 106000 2 1
## 107000 1 0
## 107300 1 0
## 107500 1 0
## 108000 2 0
## 110000 0 1
## 112000 3 0
## 115000 5 0
## 118000 1 0
## 120000 3 1
## 126710 1 0
## 130000 1 0
## 145800 1 0
## 146000 1 0
## 162000 1 0
## 220000 0 1
tab2<-xtabs(~salary+gmat_tot,data=job.df)
tab2
## gmat_tot
## salary 500 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660
## 64000 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
## 77000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 78256 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 82000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 85000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1
## 86000 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 88000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 88500 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 90000 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0
## 92000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1
## 93000 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0
## 95000 0 0 1 0 0 2 0 0 0 0 2 0 0 0 0 0
## 96000 0 0 0 0 0 1 0 0 1 1 0 0 0 0 1 0
## 96500 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 97000 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0
## 98000 0 0 0 0 0 1 3 1 1 0 1 0 0 0 0 0
## 99000 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## 100000 0 0 0 0 0 2 0 1 0 1 1 0 1 0 2 0
## 100400 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 101000 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0
## 101100 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 101600 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 102500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 103000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 104000 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0
## 105000 0 0 0 0 2 0 2 3 0 1 0 1 0 0 1 0
## 106000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 107000 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 107300 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 107500 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 108000 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0
## 110000 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## 112000 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
## 115000 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 0
## 118000 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 120000 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0
## 126710 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## 130000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## 145800 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
## 146000 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 162000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 220000 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## gmat_tot
## salary 670 680 700 710 720
## 64000 0 0 0 0 0
## 77000 0 0 0 0 0
## 78256 0 0 0 0 0
## 82000 1 0 0 0 0
## 85000 0 0 1 0 1
## 86000 0 1 0 0 0
## 88000 0 0 0 0 0
## 88500 0 0 0 0 0
## 90000 0 0 0 0 0
## 92000 0 0 0 1 0
## 93000 0 0 0 0 0
## 95000 2 0 0 0 0
## 96000 0 0 0 0 0
## 96500 0 0 0 0 0
## 97000 0 0 0 0 0
## 98000 1 1 0 1 0
## 99000 0 0 0 0 0
## 100000 0 0 0 1 0
## 100400 0 0 0 0 0
## 101000 0 0 0 0 0
## 101100 0 0 0 0 0
## 101600 0 0 0 0 0
## 102500 1 0 0 0 0
## 103000 0 0 0 0 0
## 104000 0 0 0 0 0
## 105000 0 1 0 0 0
## 106000 0 2 0 0 0
## 107000 0 0 0 0 0
## 107300 0 0 0 0 0
## 107500 0 0 0 0 0
## 108000 0 0 0 0 0
## 110000 0 0 0 0 0
## 112000 1 1 0 0 0
## 115000 0 0 0 1 0
## 118000 0 0 0 0 0
## 120000 1 0 1 0 0
## 126710 0 0 0 0 0
## 130000 0 0 0 0 0
## 145800 0 0 0 0 0
## 146000 0 0 0 0 0
## 162000 0 0 1 0 0
## 220000 0 0 0 0 0
tab3<-xtabs(~salary+satis,data=job.df)
tab3
## satis
## salary 3 4 5 6 7
## 64000 0 0 0 0 1
## 77000 0 0 0 1 0
## 78256 0 0 1 0 0
## 82000 0 0 0 0 1
## 85000 0 0 1 3 0
## 86000 0 0 2 0 0
## 88000 0 0 0 0 1
## 88500 0 0 0 1 0
## 90000 0 0 2 0 1
## 92000 0 0 1 1 1
## 93000 0 0 1 2 0
## 95000 1 1 1 2 2
## 96000 0 0 1 1 2
## 96500 0 0 0 1 0
## 97000 0 0 0 1 1
## 98000 0 0 2 5 3
## 99000 0 0 0 1 0
## 100000 0 0 1 6 2
## 100400 0 0 0 0 1
## 101000 0 0 1 1 0
## 101100 0 0 0 1 0
## 101600 0 0 0 1 0
## 102500 0 0 1 0 0
## 103000 0 0 0 1 0
## 104000 0 0 1 1 0
## 105000 0 0 4 6 1
## 106000 0 0 0 2 1
## 107000 0 0 1 0 0
## 107300 0 0 0 0 1
## 107500 0 0 1 0 0
## 108000 0 0 0 2 0
## 110000 0 0 1 0 0
## 112000 0 0 0 2 1
## 115000 0 0 3 2 0
## 118000 0 0 0 0 1
## 120000 0 0 2 2 0
## 126710 0 0 0 1 0
## 130000 0 0 0 0 1
## 145800 0 0 0 1 0
## 146000 0 0 0 1 0
## 162000 0 0 1 0 0
## 220000 0 0 0 1 0
chisq.test(tab1)
## Warning in chisq.test(tab1): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: tab1
## X-squared = 52.681, df = 41, p-value = 0.1045
chisq.test(tab2)
## Warning in chisq.test(tab2): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: tab2
## X-squared = 927.24, df = 820, p-value = 0.005279
chisq.test(tab3)
## Warning in chisq.test(tab3): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: tab3
## X-squared = 109.1, df = 164, p-value = 0.9997
fit <- lm(job.df$salary ~ job.df$sex+job.df$gmat_tot+job.df$gmat_qpc+job.df$gmat_vpc+job.df$gmat_tpc, data = job.df)
summary(fit)
##
## Call:
## lm(formula = job.df$salary ~ job.df$sex + job.df$gmat_tot + job.df$gmat_qpc +
## job.df$gmat_vpc + job.df$gmat_tpc, data = job.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36775 -8676 -1384 4527 103963
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 107444.0 47632.9 2.256 0.0263 *
## job.df$sex -6469.3 3859.2 -1.676 0.0969 .
## job.df$gmat_tot 103.3 182.3 0.567 0.5724
## job.df$gmat_qpc 585.9 542.7 1.080 0.2830
## job.df$gmat_vpc 460.2 541.3 0.850 0.3973
## job.df$gmat_tpc -1685.9 794.4 -2.122 0.0364 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17510 on 97 degrees of freedom
## Multiple R-squared: 0.08733, Adjusted R-squared: 0.04028
## F-statistic: 1.856 on 5 and 97 DF, p-value: 0.1091
fit <- lm(job.df$salary ~ job.df$quarter+job.df$work_yrs+job.df$frstlang+job.df$satis, data = job.df)
summary(fit)
##
## Call:
## lm(formula = job.df$salary ~ job.df$quarter + job.df$work_yrs +
## job.df$frstlang + job.df$satis, data = job.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30022 -9509 -533 4151 78717
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 90632.0 13061.5 6.939 4.30e-10 ***
## job.df$quarter -1328.4 1456.8 -0.912 0.3641
## job.df$work_yrs 2424.2 536.8 4.516 1.76e-05 ***
## job.df$frstlang 14260.1 6360.1 2.242 0.0272 *
## job.df$satis -1486.5 2055.7 -0.723 0.4713
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15760 on 98 degrees of freedom
## Multiple R-squared: 0.2529, Adjusted R-squared: 0.2224
## F-statistic: 8.293 on 4 and 98 DF, p-value: 8.366e-06
t.test(job.df$salary~ job.df$sex, var.equal = TRUE)
##
## Two Sample t-test
##
## data: job.df$salary by job.df$sex
## t = 1.6948, df = 101, p-value = 0.0932
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1099.123 13992.293
## sample estimates:
## mean in group 1 mean in group 2
## 104970.97 98524.39