Reading the raw data into a dataframe
MBA.df <- read.csv("C:/Users/here_is_sachin/Downloads/MBA Starting Salaries Data.csv")
attach(MBA.df)
dim(MBA.df)
## [1] 274 13
library(psych)
describe(MBA.df)[,c(1:5)]
## vars n mean sd median
## age 1 274 27.36 3.71 27
## sex 2 274 1.25 0.43 1
## gmat_tot 3 274 619.45 57.54 620
## gmat_qpc 4 274 80.64 14.87 83
## gmat_vpc 5 274 78.32 16.86 81
## gmat_tpc 6 274 84.20 14.02 87
## s_avg 7 274 3.03 0.38 3
## f_avg 8 274 3.06 0.53 3
## quarter 9 274 2.48 1.11 2
## work_yrs 10 274 3.87 3.23 3
## frstlang 11 274 1.12 0.32 1
## salary 12 274 39025.69 50951.56 999
## satis 13 274 172.18 371.61 6
- There are lots of missing values and special cases in the salary data above
Inspect the datatypes. Convert the data type of some columns
Sex and First Language
str(MBA.df)
## 'data.frame': 274 obs. of 13 variables:
## $ age : int 23 24 24 24 24 24 25 25 25 25 ...
## $ sex : int 2 1 1 1 2 1 1 2 1 1 ...
## $ gmat_tot: int 620 610 670 570 710 640 610 650 630 680 ...
## $ gmat_qpc: int 77 90 99 56 93 82 89 88 79 99 ...
## $ gmat_vpc: int 87 71 78 81 98 89 74 89 91 81 ...
## $ gmat_tpc: int 87 87 95 75 98 91 87 92 89 96 ...
## $ s_avg : num 3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
## $ f_avg : num 3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
## $ quarter : int 1 1 1 1 1 1 1 1 1 1 ...
## $ work_yrs: int 2 2 2 1 2 2 2 2 2 2 ...
## $ frstlang: int 1 1 1 1 1 1 1 1 2 1 ...
## $ salary : int 0 0 0 0 999 0 0 0 999 998 ...
## $ satis : int 7 6 6 7 5 6 5 6 4 998 ...
# Replace the 'sex' columns as follows: 1 = Male, 2 = Female
# Replace the 'frstlang' columns as follows: 1 = English, 2 = Other
MBA.df$sex[MBA.df$sex == 1] <- 'Male'
MBA.df$sex[MBA.df$sex == 2] <- 'Female'
MBA.df$sex <- factor(MBA.df$sex)
MBA.df$frstlang[MBA.df$frstlang == 1] <- 'English'
MBA.df$frstlang[MBA.df$frstlang == 2] <- 'Other'
MBA.df$frstlang <- factor(MBA.df$frstlang)
str(MBA.df)
## 'data.frame': 274 obs. of 13 variables:
## $ age : int 23 24 24 24 24 24 25 25 25 25 ...
## $ sex : Factor w/ 2 levels "Female","Male": 1 2 2 2 1 2 2 1 2 2 ...
## $ gmat_tot: int 620 610 670 570 710 640 610 650 630 680 ...
## $ gmat_qpc: int 77 90 99 56 93 82 89 88 79 99 ...
## $ gmat_vpc: int 87 71 78 81 98 89 74 89 91 81 ...
## $ gmat_tpc: int 87 87 95 75 98 91 87 92 89 96 ...
## $ s_avg : num 3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
## $ f_avg : num 3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
## $ quarter : int 1 1 1 1 1 1 1 1 1 1 ...
## $ work_yrs: int 2 2 2 1 2 2 2 2 2 2 ...
## $ frstlang: Factor w/ 2 levels "English","Other": 1 1 1 1 1 1 1 1 2 1 ...
## $ salary : int 0 0 0 0 999 0 0 0 999 998 ...
## $ satis : int 7 6 6 7 5 6 5 6 4 998 ...
placed.df <- MBA.df[which (MBA.df$salary > 1000) , ]
View(placed.df)
# MBAs who were not placed
notPlaced.df <- MBA.df[which(MBA.df$salary==0), ]
View(notPlaced.df)
# MBAs who were placed but did not disclose their salary
notDisclosedSalary.df <- MBA.df[which (MBA.df$salary == 999) , ]
View(notDisclosedSalary.df)
# MBAs who did not answer the survey
notAnsweredSurvey.df <- MBA.df[which (MBA.df$salary == 998) , ]
View(notAnsweredSurvey.df)
# Verify that the splitting of data was correct - the sum of the rows in the 4 dataframes should add up to the original dataframe
c1 = dim(placed.df)[1]
c2 = dim(notPlaced.df)[1]
c3 = dim(notDisclosedSalary.df)[1]
c4 = dim(notAnsweredSurvey.df)[1]
c = c1+c2+c3+c4
c
## [1] 274
dim(MBA.df)[1]
## [1] 274
head(placed.df)
## age sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 35 22 Female 660 90 92 94 3.5 3.75 1
## 36 27 Female 700 94 98 98 3.3 3.25 1
## 37 25 Female 680 87 96 96 3.5 2.67 1
## 38 25 Female 650 82 91 93 3.4 3.25 1
## 39 27 Male 710 96 96 98 3.3 3.50 1
## 40 28 Female 620 52 98 87 3.4 3.75 1
## work_yrs frstlang salary satis
## 35 1 English 85000 5
## 36 2 English 85000 6
## 37 2 English 86000 5
## 38 3 English 88000 7
## 39 2 English 92000 6
## 40 5 English 93000 5
avgSalary = mean(placed.df$salary)
avgSalary
## [1] 103030.7
notDisclosedSalary.df$salary = avgSalary
allPlaced.df <- rbind(placed.df, notDisclosedSalary.df)
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
some(allPlaced.df) # sample any ten randomly selected rows from the dataframe allPlaced.df
## age sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 36 27 Female 700 94 98 98 3.3 3.25 1
## 47 24 Male 560 81 50 71 3.4 3.67 1
## 64 27 Male 600 67 84 83 3.5 3.00 1
## 120 24 Male 560 52 81 72 3.2 3.25 2
## 133 34 Male 550 72 58 69 3.0 3.00 2
## 198 28 Male 610 89 67 86 2.7 3.00 3
## 207 32 Male 660 83 95 94 2.9 3.50 3
## 256 24 Female 560 55 78 71 3.5 3.25 4
## 78 25 Male 690 87 98 98 3.0 3.00 2
## 91 27 Male 740 99 98 99 3.1 3.50 2
## work_yrs frstlang salary satis
## 36 2 English 85000.0 6
## 47 2 English 100000.0 6
## 64 3 English 120000.0 5
## 120 2 English 96000.0 7
## 133 16 English 105000.0 5
## 198 4 English 98000.0 7
## 207 2 Other 107300.0 7
## 256 2 English 64000.0 7
## 78 3 English 103030.7 5
## 91 2 English 103030.7 4
- Visually inspect 10 randomly selected rows of the dataframe
Summary Statistics of allPlaced.df
library(psych)
describe(allPlaced.df)[,c(1:5)]
## vars n mean sd median
## age 1 138 26.96 3.05 26.0
## sex* 2 138 1.74 0.44 2.0
## gmat_tot 3 138 619.28 53.47 620.0
## gmat_qpc 4 138 81.10 13.59 83.5
## gmat_vpc 5 138 77.99 17.10 81.5
## gmat_tpc 6 138 84.48 13.08 87.0
## s_avg 7 138 3.03 0.38 3.0
## f_avg 8 138 3.06 0.46 3.0
## quarter 9 138 2.43 1.15 2.0
## work_yrs 10 138 3.67 2.75 3.0
## frstlang* 11 138 1.12 0.32 1.0
## salary 12 138 103030.74 15418.25 103030.7
## satis 13 138 5.53 1.11 6.0
View(allPlaced.df)
Review the Distribution of Salary
library(lattice)
histogram(~salary, data = placed.df,
main = "Distribution of Starting Salary", xlab="Starting Salary", col='grey' )

histogram(~salary, data = allPlaced.df,
main = "Distribution of Starting Salary", xlab="Starting Salary", col='grey' )

Comparison of salaray with the given variables
aggregate(cbind(salary, work_yrs, age) ~ sex,
data = placed.df, mean)
## sex salary work_yrs age
## 1 Female 98524.39 3.258065 26.06452
## 2 Male 104970.97 3.861111 27.08333
aggregate(cbind(salary, work_yrs, age) ~ sex,
data = allPlaced.df, mean)
## sex salary work_yrs age
## 1 Female 99150.27 3.277778 26.13889
## 2 Male 104400.32 3.803922 27.24510
Comparison of Salary with Work Experience
scatterplot(salary ~ work_yrs ,data=placed.df, main="Scatterplot of Salary with Work Experience", xlab="Work Experience", ylab="MBA's Starting Salaries", horizontal=TRUE)

boxplot(salary ~ work_yrs ,data=placed.df, main="Distribution of Salary with Work Experience", ylab="Work Experience", xlab="MBA's Starting Salaries", horizontal=TRUE)

library(lattice)
histogram(~salary, data = placed.df,
main = "Frequency of Starting Salary", xlab="Starting Salary", col='grey' )

salaryWorkEx = aggregate(salary ~ work_yrs, data = placed.df, mean)
salaryWorkEx
## work_yrs salary
## 1 0 95000.00
## 2 1 103532.00
## 3 2 97673.68
## 4 3 101652.86
## 5 4 105454.55
## 6 5 103142.86
## 7 6 105928.57
## 8 7 98000.00
## 9 8 105025.00
## 10 10 118000.00
## 11 15 183000.00
## 12 16 108500.00
Comparison of Salary with GMAT total score
scatterplot(salary ~ gmat_tot , data=placed.df,
xlab="GMAT Total", ylab="Salary",
main="Comparison of Salary with Total GMAT score",
labels=row.names(placed.df))

scatterplot(salary ~ gmat_tot | sex, data=placed.df,
xlab="GMAT Total", ylab="Salary",
main="Comparison of Salary with Total GMAT score",
labels=row.names(placed.df))

boxplot(salary ~ gmat_tot , data=placed.df,
ylab="GMAT Total", xlab="Salary",
main="Comparison of Salary with Total GMAT score",
horizontal=TRUE,
labels=row.names(placed.df))

colnames(placed.df)
## [1] "age" "sex" "gmat_tot" "gmat_qpc" "gmat_vpc" "gmat_tpc"
## [7] "s_avg" "f_avg" "quarter" "work_yrs" "frstlang" "salary"
## [13] "satis"
library(car)
scatterplot.matrix(~salary+gmat_tot+s_avg+f_avg, data=placed.df,
main="Salary versus other variables")

scatterplot.matrix(~salary+gmat_tot+s_avg+f_avg |sex, data=placed.df,
main="Salary versus other variables")

Number of male and females in dataframe age-wise
ageTable <- table(placed.df$sex, placed.df$age)
ageTable
##
## 22 23 24 25 26 27 28 29 30 31 32 33 34 39 40
## Female 1 2 5 10 5 1 3 1 2 0 0 0 0 0 1
## Male 0 3 11 13 9 13 5 5 4 4 1 1 1 1 1
Effect of Sex on the on Salary
aggregate(cbind(salary, work_yrs, age) ~ sex,
data = MBA.df, mean)
## sex salary work_yrs age
## 1 Female 45121.07 3.808824 27.17647
## 2 Male 37013.62 3.893204 27.41748
Effect of Age on the on Salary
aggregate(cbind(salary, work_yrs) ~ age, data = MBA.df, mean)
## age salary work_yrs
## 1 22 42500.00 1.000000
## 2 23 57282.00 1.750000
## 3 24 49342.24 1.727273
## 4 25 43395.55 2.264151
## 5 26 35982.07 2.875000
## 6 27 31499.37 3.130435
## 7 28 39809.00 4.666667
## 8 29 28067.95 4.500000
## 9 30 55291.25 5.583333
## 10 31 40599.40 5.800000
## 11 32 13662.25 5.625000
## 12 33 118000.00 10.000000
## 13 34 26250.00 11.500000
## 14 35 0.00 9.333333
## 15 36 0.00 12.500000
## 16 37 0.00 9.000000
## 17 39 56000.00 10.500000
## 18 40 183000.00 15.000000
## 19 42 0.00 13.000000
## 20 43 0.00 19.000000
## 21 48 0.00 22.000000
Effect of Satisfaction level on the on Salary
aggregate(cbind(salary, work_yrs) ~ satis , data = MBA.df, mean)
## satis salary work_yrs
## 1 1 999.000 3.000000
## 2 2 999.000 2.000000
## 3 3 19799.200 4.200000
## 4 4 6293.412 2.941176
## 5 5 40476.311 4.243243
## 6 6 54383.536 4.185567
## 7 7 65718.152 3.727273
## 8 998 998.000 3.086957
Effect of MBA’s Starting salary based on Work Experience
boxplot(salary ~ work_yrs ,data=MBA.df, main="Effect of Work Experience on Salary", ylab="Work Experience", xlab="MBA's Starting Salaries", horizontal=FALSE)

Effect of MBA’s Starting salary based on Gender
boxplot(salary ~ sex ,data=MBA.df, main="Effect of Gender on Salary", ylab="Work Experience", xlab="MBA's Starting Salaries", horizontal=FALSE)

Distribution of MBA’s Starting Salary
library(lattice)
histogram(~salary, data = MBA.df,
main = "Distribution of MBA's Starting Salary", xlab="MBA's Starting Slariy", col='grey' )

Distribution of MBA’s Starting Salary
library(lattice)
histogram(~salary, data = MBA.df,
main = "Distribution of MBA's Starting Salary", xlab="MBA's Starting Slariy", col='grey' )

Merge placed.df ; notDisclosed.df ; notPlaced = knownMBA.df
knownMBA.df <- rbind(placed.df, notDisclosedSalary.df, notPlaced.df)
View(knownMBA.df)
Create a dummay variable called “GotPlaced” = 1 (got a job) or 0 (did not get a job)
knownMBA.df$GotPlaced = (knownMBA.df$salary >1000)
View(knownMBA.df)
knownMBA.df$GotPlaced <- factor(knownMBA.df$GotPlaced)
str(knownMBA.df)
## 'data.frame': 228 obs. of 14 variables:
## $ age : int 22 27 25 25 27 28 24 25 25 25 ...
## $ sex : Factor w/ 2 levels "Female","Male": 1 1 1 1 2 1 2 1 1 2 ...
## $ gmat_tot : int 660 700 680 650 710 620 670 560 530 650 ...
## $ gmat_qpc : int 90 94 87 82 96 52 84 52 50 79 ...
## $ gmat_vpc : int 92 98 96 91 96 98 96 81 62 93 ...
## $ gmat_tpc : int 94 98 96 93 98 87 95 72 61 93 ...
## $ s_avg : num 3.5 3.3 3.5 3.4 3.3 3.4 3.3 3.3 3.6 3.3 ...
## $ f_avg : num 3.75 3.25 2.67 3.25 3.5 3.75 3.25 3.5 3.67 3.5 ...
## $ quarter : int 1 1 1 1 1 1 1 1 1 1 ...
## $ work_yrs : int 1 2 2 3 2 5 0 1 3 1 ...
## $ frstlang : Factor w/ 2 levels "English","Other": 1 1 1 1 1 1 1 1 1 1 ...
## $ salary : num 85000 85000 86000 88000 92000 93000 95000 95000 95000 96000 ...
## $ satis : int 5 6 5 7 6 5 4 5 3 7 ...
## $ GotPlaced: Factor w/ 2 levels "FALSE","TRUE": 2 2 2 2 2 2 2 2 2 2 ...
#GotPlaced = factor(year)
#dummies = model.matrix(~year.f)
Create contingency tables, counting allPlaced / notPlaced versus Sex: Male / Female
Number of Placed and Not Placed candiadtes
allplaced <- table(knownMBA.df$GotPlaced == 'TRUE')
allplaced
##
## FALSE TRUE
## 90 138
allPlaced / notPlaced versus Sex: Male / Female
placedbySex <- xtabs(~ knownMBA.df$GotPlaced + knownMBA.df$sex , data=knownMBA.df)
placedbySex
## knownMBA.df$sex
## knownMBA.df$GotPlaced Female Male
## FALSE 23 67
## TRUE 36 102
addmargins(placedbySex)
## knownMBA.df$sex
## knownMBA.df$GotPlaced Female Male Sum
## FALSE 23 67 90
## TRUE 36 102 138
## Sum 59 169 228
Percentage of Male / Female candidates who got Placed
prop.table(placedbySex, 2)
## knownMBA.df$sex
## knownMBA.df$GotPlaced Female Male
## FALSE 0.3898305 0.3964497
## TRUE 0.6101695 0.6035503
allPlaced / notPlaced versus First Language: Enlish / Other
placedbyLanguage <- xtabs(~ knownMBA.df$GotPlaced + knownMBA.df$frstlang, data=knownMBA.df)
placedbyLanguage
## knownMBA.df$frstlang
## knownMBA.df$GotPlaced English Other
## FALSE 82 8
## TRUE 122 16
addmargins(placedbyLanguage)
## knownMBA.df$frstlang
## knownMBA.df$GotPlaced English Other Sum
## FALSE 82 8 90
## TRUE 122 16 138
## Sum 204 24 228
Percentage of First Language candidates who got Placed
prop.table(placedbyLanguage, 2)
## knownMBA.df$frstlang
## knownMBA.df$GotPlaced English Other
## FALSE 0.4019608 0.3333333
## TRUE 0.5980392 0.6666667
H1: The percentage of Females placed is more than Males
Chi Square Test : percentage of female who got placed is higher than percentage of male who got placed
chisq.test(placedbySex)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: placedbySex
## X-squared = 3.5816e-30, df = 1, p-value = 1
H2: The percentage of people placed whose first language is English is higher than the percentage of people placed whose first language is not English
Chi Square Test
chisq.test(placedbyLanguage)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: placedbyLanguage
## X-squared = 0.18479, df = 1, p-value = 0.6673
library(corrplot)
colnames(placed.df)
## [1] "age" "sex" "gmat_tot" "gmat_qpc" "gmat_vpc" "gmat_tpc"
## [7] "s_avg" "f_avg" "quarter" "work_yrs" "frstlang" "salary"
## [13] "satis"
dataColumns <- placed.df[, c("age","work_yrs", "gmat_tot", "gmat_qpc", "gmat_vpc", "gmat_tpc", "s_avg", "f_avg", "quarter", "satis")]
N <- cor(dataColumns)
corrplot(N, method="circle")

res <- cor(dataColumns)
round(res, 2)
## age work_yrs gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg
## age 1.00 0.88 -0.08 -0.17 0.02 -0.10 0.16 -0.22
## work_yrs 0.88 1.00 -0.12 -0.18 -0.03 -0.13 0.16 -0.22
## gmat_tot -0.08 -0.12 1.00 0.67 0.78 0.97 0.17 0.12
## gmat_qpc -0.17 -0.18 0.67 1.00 0.09 0.66 0.02 0.10
## gmat_vpc 0.02 -0.03 0.78 0.09 1.00 0.78 0.16 0.02
## gmat_tpc -0.10 -0.13 0.97 0.66 0.78 1.00 0.14 0.07
## s_avg 0.16 0.16 0.17 0.02 0.16 0.14 1.00 0.45
## f_avg -0.22 -0.22 0.12 0.10 0.02 0.07 0.45 1.00
## quarter -0.13 -0.13 -0.11 0.01 -0.13 -0.10 -0.84 -0.43
## satis 0.11 0.06 0.06 0.00 0.15 0.12 -0.14 -0.12
## quarter satis
## age -0.13 0.11
## work_yrs -0.13 0.06
## gmat_tot -0.11 0.06
## gmat_qpc 0.01 0.00
## gmat_vpc -0.13 0.15
## gmat_tpc -0.10 0.12
## s_avg -0.84 -0.14
## f_avg -0.43 -0.12
## quarter 1.00 0.23
## satis 0.23 1.00
# MBA PERFORMANCE
# The variables tracking performance during the MBA are heavily correlated
mbaPerformance <- placed.df[, c("s_avg", "f_avg", "quarter")]
N <- cor(mbaPerformance)
corrplot(N, method="circle")

res <- cor(mbaPerformance)
round(res, 2)
## s_avg f_avg quarter
## s_avg 1.00 0.45 -0.84
## f_avg 0.45 1.00 -0.43
## quarter -0.84 -0.43 1.00
gmat <- placed.df[, c("gmat_tot", "gmat_qpc", "gmat_vpc", "gmat_tpc")]
res <- cor(gmat)
round(res, 2)
## gmat_tot gmat_qpc gmat_vpc gmat_tpc
## gmat_tot 1.00 0.67 0.78 0.97
## gmat_qpc 0.67 1.00 0.09 0.66
## gmat_vpc 0.78 0.09 1.00 0.78
## gmat_tpc 0.97 0.66 0.78 1.00
library(corrplot)
M <- cor(gmat)
corrplot(M, method="circle")

# However, GMAT verbal and quantitative scores are very weakly correlated
cor(gmat_qpc,gmat_vpc)
## [1] 0.1521801
# 1f. MBA PERFORMANCE
# The variables tracking performance during the MBA are heavily correlated
mbaPerformance <- placed.df[, c("s_avg", "f_avg", "quarter")]
N <- cor(mbaPerformance)
corrplot(N, method="circle")

res <- cor(mbaPerformance)
round(res, 2)
## s_avg f_avg quarter
## s_avg 1.00 0.45 -0.84
## f_avg 0.45 1.00 -0.43
## quarter -0.84 -0.43 1.00
# The overall performance quartile (quarter) is highly correlated with the Spring (s_avg) and Fall (f_avg) GPA
# We will include 's_avg' and 'f_avg' in our regression, but exclude 'quarter' from our regression.
cor(age,work_yrs)
## [1] 0.8582981
# VARIANCE - COVARIANCE MATRIX
columns = c("salary", "work_yrs", "gmat_qpc", "gmat_vpc", "s_avg", "f_avg", "satis")
placedVariables <- placed.df[, columns]
res <- cor(placedVariables)
round(res, 2)
## salary work_yrs gmat_qpc gmat_vpc s_avg f_avg satis
## salary 1.00 0.45 0.01 -0.14 0.10 -0.11 -0.04
## work_yrs 0.45 1.00 -0.18 -0.03 0.16 -0.22 0.06
## gmat_qpc 0.01 -0.18 1.00 0.09 0.02 0.10 0.00
## gmat_vpc -0.14 -0.03 0.09 1.00 0.16 0.02 0.15
## s_avg 0.10 0.16 0.02 0.16 1.00 0.45 -0.14
## f_avg -0.11 -0.22 0.10 0.02 0.45 1.00 -0.12
## satis -0.04 0.06 0.00 0.15 -0.14 -0.12 1.00
library(corrplot)
M <- cor(placed.df[, columns])
corrplot(M, method="circle")

SCATTER PLOTS
library(car)
scatterplotMatrix(~salary + s_avg + f_avg + satis, data=placed.df,
main="Salary versus MBA Performance and MBA Satisfaction")

library(car)
scatterplotMatrix(~salary + work_yrs + gmat_qpc + gmat_vpc, data=placed.df,
main="Salary versus Work Experience; GMAT Performance")

REGRESSION
Formulating multivariate linear regression model to fit salary with respect to the model selection
Dependent Variable: Salary
Model1 <- salary ~
work_yrs + s_avg + f_avg + gmat_qpc + gmat_vpc + sex + frstlang + satis
fit1 <- lm(Model1, data = placed.df)
summary(fit1)
##
## Call:
## lm(formula = Model1, data = placed.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29800 -7822 -1742 4869 82341
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 90136.22 21739.22 4.146 7.4e-05 ***
## work_yrs 2331.12 585.99 3.978 0.000137 ***
## s_avg 4659.05 5015.66 0.929 0.355320
## f_avg -1698.83 3834.70 -0.443 0.658773
## gmat_qpc 98.72 121.85 0.810 0.419884
## gmat_vpc -95.80 102.99 -0.930 0.354699
## sexMale 5289.24 3545.91 1.492 0.139140
## frstlangOther 13994.76 6641.66 2.107 0.037770 *
## satis -1671.20 2070.62 -0.807 0.421643
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15740 on 94 degrees of freedom
## Multiple R-squared: 0.285, Adjusted R-squared: 0.2241
## F-statistic: 4.683 on 8 and 94 DF, p-value: 7.574e-05