Reading the raw data into a dataframe

MBA.df <- read.csv("C:/Users/here_is_sachin/Downloads/MBA Starting Salaries Data.csv")
attach(MBA.df)
dim(MBA.df)
## [1] 274  13
library(psych)
describe(MBA.df)[,c(1:5)]
##          vars   n     mean       sd median
## age         1 274    27.36     3.71     27
## sex         2 274     1.25     0.43      1
## gmat_tot    3 274   619.45    57.54    620
## gmat_qpc    4 274    80.64    14.87     83
## gmat_vpc    5 274    78.32    16.86     81
## gmat_tpc    6 274    84.20    14.02     87
## s_avg       7 274     3.03     0.38      3
## f_avg       8 274     3.06     0.53      3
## quarter     9 274     2.48     1.11      2
## work_yrs   10 274     3.87     3.23      3
## frstlang   11 274     1.12     0.32      1
## salary     12 274 39025.69 50951.56    999
## satis      13 274   172.18   371.61      6

Inspect the datatypes. Convert the data type of some columns

Sex and First Language

str(MBA.df)
## 'data.frame':    274 obs. of  13 variables:
##  $ age     : int  23 24 24 24 24 24 25 25 25 25 ...
##  $ sex     : int  2 1 1 1 2 1 1 2 1 1 ...
##  $ gmat_tot: int  620 610 670 570 710 640 610 650 630 680 ...
##  $ gmat_qpc: int  77 90 99 56 93 82 89 88 79 99 ...
##  $ gmat_vpc: int  87 71 78 81 98 89 74 89 91 81 ...
##  $ gmat_tpc: int  87 87 95 75 98 91 87 92 89 96 ...
##  $ s_avg   : num  3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
##  $ f_avg   : num  3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
##  $ quarter : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ work_yrs: int  2 2 2 1 2 2 2 2 2 2 ...
##  $ frstlang: int  1 1 1 1 1 1 1 1 2 1 ...
##  $ salary  : int  0 0 0 0 999 0 0 0 999 998 ...
##  $ satis   : int  7 6 6 7 5 6 5 6 4 998 ...
# Replace the 'sex' columns as follows:  1 = Male, 2 = Female
# Replace the 'frstlang' columns as follows:  1 = English, 2 = Other


MBA.df$sex[MBA.df$sex == 1] <- 'Male'
MBA.df$sex[MBA.df$sex == 2] <- 'Female'
MBA.df$sex <- factor(MBA.df$sex)

MBA.df$frstlang[MBA.df$frstlang == 1] <- 'English'
MBA.df$frstlang[MBA.df$frstlang == 2] <- 'Other'
MBA.df$frstlang <- factor(MBA.df$frstlang)

str(MBA.df) 
## 'data.frame':    274 obs. of  13 variables:
##  $ age     : int  23 24 24 24 24 24 25 25 25 25 ...
##  $ sex     : Factor w/ 2 levels "Female","Male": 1 2 2 2 1 2 2 1 2 2 ...
##  $ gmat_tot: int  620 610 670 570 710 640 610 650 630 680 ...
##  $ gmat_qpc: int  77 90 99 56 93 82 89 88 79 99 ...
##  $ gmat_vpc: int  87 71 78 81 98 89 74 89 91 81 ...
##  $ gmat_tpc: int  87 87 95 75 98 91 87 92 89 96 ...
##  $ s_avg   : num  3.4 3.5 3.3 3.3 3.6 3.9 3.4 3.3 3.3 3.45 ...
##  $ f_avg   : num  3 4 3.25 2.67 3.75 3.75 3.5 3.75 3.25 3.67 ...
##  $ quarter : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ work_yrs: int  2 2 2 1 2 2 2 2 2 2 ...
##  $ frstlang: Factor w/ 2 levels "English","Other": 1 1 1 1 1 1 1 1 2 1 ...
##  $ salary  : int  0 0 0 0 999 0 0 0 999 998 ...
##  $ satis   : int  7 6 6 7 5 6 5 6 4 998 ...
placed.df <- MBA.df[which (MBA.df$salary > 1000)  , ]
View(placed.df)

# MBAs who were not placed
notPlaced.df <- MBA.df[which(MBA.df$salary==0), ]
View(notPlaced.df)

# MBAs who were placed but did not disclose their salary
notDisclosedSalary.df  <- MBA.df[which (MBA.df$salary == 999)  , ]
View(notDisclosedSalary.df)

# MBAs who did not answer the survey
notAnsweredSurvey.df  <- MBA.df[which (MBA.df$salary == 998)  , ]
View(notAnsweredSurvey.df)

# Verify that the splitting of data was correct - the sum of the rows in the 4 dataframes should add up to the original dataframe
c1 = dim(placed.df)[1]
c2 = dim(notPlaced.df)[1]
c3 = dim(notDisclosedSalary.df)[1]
c4 = dim(notAnsweredSurvey.df)[1]
c = c1+c2+c3+c4
c
## [1] 274
dim(MBA.df)[1]
## [1] 274
head(placed.df)
##    age    sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 35  22 Female      660       90       92       94   3.5  3.75       1
## 36  27 Female      700       94       98       98   3.3  3.25       1
## 37  25 Female      680       87       96       96   3.5  2.67       1
## 38  25 Female      650       82       91       93   3.4  3.25       1
## 39  27   Male      710       96       96       98   3.3  3.50       1
## 40  28 Female      620       52       98       87   3.4  3.75       1
##    work_yrs frstlang salary satis
## 35        1  English  85000     5
## 36        2  English  85000     6
## 37        2  English  86000     5
## 38        3  English  88000     7
## 39        2  English  92000     6
## 40        5  English  93000     5
avgSalary = mean(placed.df$salary)
avgSalary
## [1] 103030.7
notDisclosedSalary.df$salary = avgSalary



allPlaced.df <- rbind(placed.df, notDisclosedSalary.df)
library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
some(allPlaced.df) # sample any ten randomly selected rows from the dataframe allPlaced.df
##     age    sex gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg quarter
## 36   27 Female      700       94       98       98   3.3  3.25       1
## 47   24   Male      560       81       50       71   3.4  3.67       1
## 64   27   Male      600       67       84       83   3.5  3.00       1
## 120  24   Male      560       52       81       72   3.2  3.25       2
## 133  34   Male      550       72       58       69   3.0  3.00       2
## 198  28   Male      610       89       67       86   2.7  3.00       3
## 207  32   Male      660       83       95       94   2.9  3.50       3
## 256  24 Female      560       55       78       71   3.5  3.25       4
## 78   25   Male      690       87       98       98   3.0  3.00       2
## 91   27   Male      740       99       98       99   3.1  3.50       2
##     work_yrs frstlang   salary satis
## 36         2  English  85000.0     6
## 47         2  English 100000.0     6
## 64         3  English 120000.0     5
## 120        2  English  96000.0     7
## 133       16  English 105000.0     5
## 198        4  English  98000.0     7
## 207        2    Other 107300.0     7
## 256        2  English  64000.0     7
## 78         3  English 103030.7     5
## 91         2  English 103030.7     4

Summary Statistics of allPlaced.df

library(psych)
describe(allPlaced.df)[,c(1:5)]
##           vars   n      mean       sd   median
## age          1 138     26.96     3.05     26.0
## sex*         2 138      1.74     0.44      2.0
## gmat_tot     3 138    619.28    53.47    620.0
## gmat_qpc     4 138     81.10    13.59     83.5
## gmat_vpc     5 138     77.99    17.10     81.5
## gmat_tpc     6 138     84.48    13.08     87.0
## s_avg        7 138      3.03     0.38      3.0
## f_avg        8 138      3.06     0.46      3.0
## quarter      9 138      2.43     1.15      2.0
## work_yrs    10 138      3.67     2.75      3.0
## frstlang*   11 138      1.12     0.32      1.0
## salary      12 138 103030.74 15418.25 103030.7
## satis       13 138      5.53     1.11      6.0
View(allPlaced.df)

Review the Distribution of Salary

library(lattice)
histogram(~salary, data = placed.df,
 main = "Distribution of Starting Salary", xlab="Starting Salary", col='grey' ) 

histogram(~salary, data = allPlaced.df,
 main = "Distribution of Starting Salary", xlab="Starting Salary", col='grey' ) 

Comparison of salaray with the given variables

aggregate(cbind(salary, work_yrs, age) ~ sex, 
                   data = placed.df, mean)
##      sex    salary work_yrs      age
## 1 Female  98524.39 3.258065 26.06452
## 2   Male 104970.97 3.861111 27.08333
aggregate(cbind(salary, work_yrs, age) ~ sex, 
                   data = allPlaced.df, mean)
##      sex    salary work_yrs      age
## 1 Female  99150.27 3.277778 26.13889
## 2   Male 104400.32 3.803922 27.24510

Comparison of Salary with Work Experience

scatterplot(salary ~ work_yrs ,data=placed.df, main="Scatterplot of Salary with Work Experience", xlab="Work Experience", ylab="MBA's Starting Salaries", horizontal=TRUE)

boxplot(salary ~ work_yrs ,data=placed.df, main="Distribution of Salary with Work Experience", ylab="Work Experience", xlab="MBA's Starting Salaries", horizontal=TRUE)

library(lattice)
histogram(~salary, data = placed.df,
 main = "Frequency of Starting Salary", xlab="Starting Salary", col='grey' ) 

salaryWorkEx = aggregate(salary ~ work_yrs, data = placed.df, mean)
salaryWorkEx
##    work_yrs    salary
## 1         0  95000.00
## 2         1 103532.00
## 3         2  97673.68
## 4         3 101652.86
## 5         4 105454.55
## 6         5 103142.86
## 7         6 105928.57
## 8         7  98000.00
## 9         8 105025.00
## 10       10 118000.00
## 11       15 183000.00
## 12       16 108500.00

Comparison of Salary with GMAT total score

scatterplot(salary ~ gmat_tot , data=placed.df, 
    xlab="GMAT Total", ylab="Salary", 
   main="Comparison of Salary with Total GMAT score", 
   labels=row.names(placed.df))

scatterplot(salary ~ gmat_tot | sex, data=placed.df, 
    xlab="GMAT Total", ylab="Salary", 
   main="Comparison of Salary with Total GMAT score", 
   labels=row.names(placed.df))

boxplot(salary ~ gmat_tot , data=placed.df, 
    ylab="GMAT Total", xlab="Salary", 
   main="Comparison of Salary with Total GMAT score", 
   horizontal=TRUE,
   labels=row.names(placed.df))

colnames(placed.df)
##  [1] "age"      "sex"      "gmat_tot" "gmat_qpc" "gmat_vpc" "gmat_tpc"
##  [7] "s_avg"    "f_avg"    "quarter"  "work_yrs" "frstlang" "salary"  
## [13] "satis"
library(car)
scatterplot.matrix(~salary+gmat_tot+s_avg+f_avg, data=placed.df,
    main="Salary versus other variables")

scatterplot.matrix(~salary+gmat_tot+s_avg+f_avg |sex, data=placed.df,
    main="Salary versus other variables")

Number of male and females in dataframe age-wise

ageTable <- table(placed.df$sex, placed.df$age)
ageTable
##         
##          22 23 24 25 26 27 28 29 30 31 32 33 34 39 40
##   Female  1  2  5 10  5  1  3  1  2  0  0  0  0  0  1
##   Male    0  3 11 13  9 13  5  5  4  4  1  1  1  1  1

Effect of Sex on the on Salary

aggregate(cbind(salary, work_yrs, age) ~ sex, 
                   data = MBA.df, mean)
##      sex   salary work_yrs      age
## 1 Female 45121.07 3.808824 27.17647
## 2   Male 37013.62 3.893204 27.41748

Effect of Age on the on Salary

aggregate(cbind(salary, work_yrs) ~ age, data = MBA.df, mean)
##    age    salary  work_yrs
## 1   22  42500.00  1.000000
## 2   23  57282.00  1.750000
## 3   24  49342.24  1.727273
## 4   25  43395.55  2.264151
## 5   26  35982.07  2.875000
## 6   27  31499.37  3.130435
## 7   28  39809.00  4.666667
## 8   29  28067.95  4.500000
## 9   30  55291.25  5.583333
## 10  31  40599.40  5.800000
## 11  32  13662.25  5.625000
## 12  33 118000.00 10.000000
## 13  34  26250.00 11.500000
## 14  35      0.00  9.333333
## 15  36      0.00 12.500000
## 16  37      0.00  9.000000
## 17  39  56000.00 10.500000
## 18  40 183000.00 15.000000
## 19  42      0.00 13.000000
## 20  43      0.00 19.000000
## 21  48      0.00 22.000000

Effect of Satisfaction level on the on Salary

aggregate(cbind(salary, work_yrs) ~ satis , data = MBA.df, mean) 
##   satis    salary work_yrs
## 1     1   999.000 3.000000
## 2     2   999.000 2.000000
## 3     3 19799.200 4.200000
## 4     4  6293.412 2.941176
## 5     5 40476.311 4.243243
## 6     6 54383.536 4.185567
## 7     7 65718.152 3.727273
## 8   998   998.000 3.086957

Effect of MBA’s Starting salary based on Work Experience

boxplot(salary ~ work_yrs ,data=MBA.df, main="Effect of Work Experience on Salary", ylab="Work Experience", xlab="MBA's Starting Salaries", horizontal=FALSE)

Effect of MBA’s Starting salary based on Gender

boxplot(salary ~ sex ,data=MBA.df, main="Effect of Gender on Salary", ylab="Work Experience", xlab="MBA's Starting Salaries", horizontal=FALSE)

Distribution of MBA’s Starting Salary

library(lattice)
histogram(~salary, data = MBA.df,
 main = "Distribution of MBA's Starting Salary", xlab="MBA's Starting Slariy", col='grey' ) 

Distribution of MBA’s Starting Salary

library(lattice)
histogram(~salary, data = MBA.df,
 main = "Distribution of MBA's Starting Salary", xlab="MBA's Starting Slariy", col='grey' ) 

Merge placed.df ; notDisclosed.df ; notPlaced = knownMBA.df

knownMBA.df <- rbind(placed.df, notDisclosedSalary.df, notPlaced.df)
View(knownMBA.df)

Create a dummay variable called “GotPlaced” = 1 (got a job) or 0 (did not get a job)

knownMBA.df$GotPlaced = (knownMBA.df$salary >1000)
View(knownMBA.df)

knownMBA.df$GotPlaced <- factor(knownMBA.df$GotPlaced)
str(knownMBA.df)
## 'data.frame':    228 obs. of  14 variables:
##  $ age      : int  22 27 25 25 27 28 24 25 25 25 ...
##  $ sex      : Factor w/ 2 levels "Female","Male": 1 1 1 1 2 1 2 1 1 2 ...
##  $ gmat_tot : int  660 700 680 650 710 620 670 560 530 650 ...
##  $ gmat_qpc : int  90 94 87 82 96 52 84 52 50 79 ...
##  $ gmat_vpc : int  92 98 96 91 96 98 96 81 62 93 ...
##  $ gmat_tpc : int  94 98 96 93 98 87 95 72 61 93 ...
##  $ s_avg    : num  3.5 3.3 3.5 3.4 3.3 3.4 3.3 3.3 3.6 3.3 ...
##  $ f_avg    : num  3.75 3.25 2.67 3.25 3.5 3.75 3.25 3.5 3.67 3.5 ...
##  $ quarter  : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ work_yrs : int  1 2 2 3 2 5 0 1 3 1 ...
##  $ frstlang : Factor w/ 2 levels "English","Other": 1 1 1 1 1 1 1 1 1 1 ...
##  $ salary   : num  85000 85000 86000 88000 92000 93000 95000 95000 95000 96000 ...
##  $ satis    : int  5 6 5 7 6 5 4 5 3 7 ...
##  $ GotPlaced: Factor w/ 2 levels "FALSE","TRUE": 2 2 2 2 2 2 2 2 2 2 ...
#GotPlaced = factor(year)
#dummies = model.matrix(~year.f)

Create contingency tables, counting allPlaced / notPlaced versus Sex: Male / Female

Number of Placed and Not Placed candiadtes

allplaced <- table(knownMBA.df$GotPlaced == 'TRUE')
allplaced
## 
## FALSE  TRUE 
##    90   138

allPlaced / notPlaced versus Sex: Male / Female

placedbySex <- xtabs(~ knownMBA.df$GotPlaced + knownMBA.df$sex , data=knownMBA.df)
placedbySex
##                      knownMBA.df$sex
## knownMBA.df$GotPlaced Female Male
##                 FALSE     23   67
##                 TRUE      36  102
addmargins(placedbySex)
##                      knownMBA.df$sex
## knownMBA.df$GotPlaced Female Male Sum
##                 FALSE     23   67  90
##                 TRUE      36  102 138
##                 Sum       59  169 228

Percentage of Male / Female candidates who got Placed

prop.table(placedbySex, 2) 
##                      knownMBA.df$sex
## knownMBA.df$GotPlaced    Female      Male
##                 FALSE 0.3898305 0.3964497
##                 TRUE  0.6101695 0.6035503

allPlaced / notPlaced versus First Language: Enlish / Other

placedbyLanguage <- xtabs(~ knownMBA.df$GotPlaced + knownMBA.df$frstlang, data=knownMBA.df)
placedbyLanguage
##                      knownMBA.df$frstlang
## knownMBA.df$GotPlaced English Other
##                 FALSE      82     8
##                 TRUE      122    16
addmargins(placedbyLanguage)
##                      knownMBA.df$frstlang
## knownMBA.df$GotPlaced English Other Sum
##                 FALSE      82     8  90
##                 TRUE      122    16 138
##                 Sum       204    24 228

Percentage of First Language candidates who got Placed

prop.table(placedbyLanguage, 2) 
##                      knownMBA.df$frstlang
## knownMBA.df$GotPlaced   English     Other
##                 FALSE 0.4019608 0.3333333
##                 TRUE  0.5980392 0.6666667

H1: The percentage of Females placed is more than Males

Chi Square Test : percentage of female who got placed is higher than percentage of male who got placed

chisq.test(placedbySex)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  placedbySex
## X-squared = 3.5816e-30, df = 1, p-value = 1

H2: The percentage of people placed whose first language is English is higher than the percentage of people placed whose first language is not English

Chi Square Test

chisq.test(placedbyLanguage)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  placedbyLanguage
## X-squared = 0.18479, df = 1, p-value = 0.6673
library(corrplot)

colnames(placed.df)
##  [1] "age"      "sex"      "gmat_tot" "gmat_qpc" "gmat_vpc" "gmat_tpc"
##  [7] "s_avg"    "f_avg"    "quarter"  "work_yrs" "frstlang" "salary"  
## [13] "satis"
dataColumns <- placed.df[, c("age","work_yrs", "gmat_tot", "gmat_qpc", "gmat_vpc", "gmat_tpc", "s_avg", "f_avg", "quarter", "satis")]

N <- cor(dataColumns)
corrplot(N, method="circle")

res <- cor(dataColumns)
round(res, 2)
##            age work_yrs gmat_tot gmat_qpc gmat_vpc gmat_tpc s_avg f_avg
## age       1.00     0.88    -0.08    -0.17     0.02    -0.10  0.16 -0.22
## work_yrs  0.88     1.00    -0.12    -0.18    -0.03    -0.13  0.16 -0.22
## gmat_tot -0.08    -0.12     1.00     0.67     0.78     0.97  0.17  0.12
## gmat_qpc -0.17    -0.18     0.67     1.00     0.09     0.66  0.02  0.10
## gmat_vpc  0.02    -0.03     0.78     0.09     1.00     0.78  0.16  0.02
## gmat_tpc -0.10    -0.13     0.97     0.66     0.78     1.00  0.14  0.07
## s_avg     0.16     0.16     0.17     0.02     0.16     0.14  1.00  0.45
## f_avg    -0.22    -0.22     0.12     0.10     0.02     0.07  0.45  1.00
## quarter  -0.13    -0.13    -0.11     0.01    -0.13    -0.10 -0.84 -0.43
## satis     0.11     0.06     0.06     0.00     0.15     0.12 -0.14 -0.12
##          quarter satis
## age        -0.13  0.11
## work_yrs   -0.13  0.06
## gmat_tot   -0.11  0.06
## gmat_qpc    0.01  0.00
## gmat_vpc   -0.13  0.15
## gmat_tpc   -0.10  0.12
## s_avg      -0.84 -0.14
## f_avg      -0.43 -0.12
## quarter     1.00  0.23
## satis       0.23  1.00
# MBA PERFORMANCE
# The variables tracking performance during the MBA are heavily correlated
mbaPerformance <- placed.df[, c("s_avg", "f_avg", "quarter")]

N <- cor(mbaPerformance)
corrplot(N, method="circle")

res <- cor(mbaPerformance)
round(res, 2)
##         s_avg f_avg quarter
## s_avg    1.00  0.45   -0.84
## f_avg    0.45  1.00   -0.43
## quarter -0.84 -0.43    1.00
gmat <- placed.df[, c("gmat_tot", "gmat_qpc", "gmat_vpc", "gmat_tpc")]
res <- cor(gmat)
round(res, 2)
##          gmat_tot gmat_qpc gmat_vpc gmat_tpc
## gmat_tot     1.00     0.67     0.78     0.97
## gmat_qpc     0.67     1.00     0.09     0.66
## gmat_vpc     0.78     0.09     1.00     0.78
## gmat_tpc     0.97     0.66     0.78     1.00
library(corrplot)
M <- cor(gmat)
corrplot(M, method="circle")

# However, GMAT verbal and quantitative scores are very weakly correlated
cor(gmat_qpc,gmat_vpc)
## [1] 0.1521801
# 1f. MBA PERFORMANCE
# The variables tracking performance during the MBA are heavily correlated
mbaPerformance <- placed.df[, c("s_avg", "f_avg", "quarter")]

N <- cor(mbaPerformance)
corrplot(N, method="circle")

res <- cor(mbaPerformance)
round(res, 2)
##         s_avg f_avg quarter
## s_avg    1.00  0.45   -0.84
## f_avg    0.45  1.00   -0.43
## quarter -0.84 -0.43    1.00
# The overall performance quartile (quarter) is highly correlated with the Spring (s_avg) and Fall (f_avg) GPA
# We will include 's_avg' and 'f_avg' in our regression, but exclude 'quarter' from our regression.
cor(age,work_yrs)
## [1] 0.8582981
# VARIANCE - COVARIANCE MATRIX
columns = c("salary", "work_yrs", "gmat_qpc", "gmat_vpc", "s_avg", "f_avg", "satis")
placedVariables <- placed.df[, columns]
res <- cor(placedVariables)
round(res, 2)
##          salary work_yrs gmat_qpc gmat_vpc s_avg f_avg satis
## salary     1.00     0.45     0.01    -0.14  0.10 -0.11 -0.04
## work_yrs   0.45     1.00    -0.18    -0.03  0.16 -0.22  0.06
## gmat_qpc   0.01    -0.18     1.00     0.09  0.02  0.10  0.00
## gmat_vpc  -0.14    -0.03     0.09     1.00  0.16  0.02  0.15
## s_avg      0.10     0.16     0.02     0.16  1.00  0.45 -0.14
## f_avg     -0.11    -0.22     0.10     0.02  0.45  1.00 -0.12
## satis     -0.04     0.06     0.00     0.15 -0.14 -0.12  1.00
library(corrplot)
M <- cor(placed.df[, columns])
corrplot(M, method="circle")

SCATTER PLOTS

library(car)
scatterplotMatrix(~salary + s_avg + f_avg + satis, data=placed.df,
    main="Salary versus MBA Performance and MBA Satisfaction")

library(car)
scatterplotMatrix(~salary + work_yrs + gmat_qpc + gmat_vpc, data=placed.df,
    main="Salary versus Work Experience; GMAT Performance")

REGRESSION

Formulating multivariate linear regression model to fit salary with respect to the model selection

Independent Variables: {work_yrs,s_avg,f_avg,gmat_qpc,gmat_vpc,sex,frstlang,satis}

Dependent Variable: Salary

Model1 <- salary ~ 
             work_yrs + s_avg + f_avg + gmat_qpc + gmat_vpc + sex + frstlang + satis 
fit1 <- lm(Model1, data = placed.df)
summary(fit1)
## 
## Call:
## lm(formula = Model1, data = placed.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -29800  -7822  -1742   4869  82341 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   90136.22   21739.22   4.146  7.4e-05 ***
## work_yrs       2331.12     585.99   3.978 0.000137 ***
## s_avg          4659.05    5015.66   0.929 0.355320    
## f_avg         -1698.83    3834.70  -0.443 0.658773    
## gmat_qpc         98.72     121.85   0.810 0.419884    
## gmat_vpc        -95.80     102.99  -0.930 0.354699    
## sexMale        5289.24    3545.91   1.492 0.139140    
## frstlangOther 13994.76    6641.66   2.107 0.037770 *  
## satis         -1671.20    2070.62  -0.807 0.421643    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15740 on 94 degrees of freedom
## Multiple R-squared:  0.285,  Adjusted R-squared:  0.2241 
## F-statistic: 4.683 on 8 and 94 DF,  p-value: 7.574e-05