Alan Zarychta

GEOG 5023: Quantitative Methods In Geography

Lecture 2 – Intro to R Exercises

Read in data:

faculty <- read.csv("C:/Users/Alan/Desktop/faculty.csv", sep = ",", header = T)  #read in data

1) What are the column names: how many obs? how many vars?:

names(faculty)  #column names
##  [1] "AYSALARY" "R1"       "R2"       "R7"       "PRIOREXP" "YRBG"    
##  [7] "YRRANK"   "TERMDEG"  "YRDG"     "EMINENT"  "FEMALE"
nrow(faculty)  #obs
## [1] 725
ncol(faculty)  #vars
## [1] 11

2) Is annual salary normally distributed?:

summary(faculty$AYSALARY)  #to get range of annual salary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   23800   36800   46700   47800   57600  103000
hist(faculty$AYSALARY, breaks = seq(23000, 110000, by = 1000), xlab = "Annual Salary", 
    main = "Histogram of Faculty Annual Salary")

plot of chunk unnamed-chunk-4

not normally distributed, skewed left

3) does it appear that male and female faculty members make the same annual salary?:

aggregate(faculty$AYSALARY, list(faculty$FEMALE), mean)  #average of annual salary by female variable
##   Group.1     x
## 1       0 51453
## 2       1 39080
mean(faculty$AYSALARY[faculty$FEMALE == 1])  #alternative code for taking mean of one var by a single category of second var
## [1] 39080
mean(faculty$AYSALARY[faculty$FEMALE == 0])  #alternative code for taking mean of one var by a single category of second var
## [1] 51453

#women appear to make less, consider doing a difference of means test

t.test(faculty$AYSALARY[faculty$FEMALE == 1], faculty$AYSALARY[faculty$FEMALE == 
    0])  #default is unequal variances
## 
##  Welch Two Sample t-test
## 
## data:  faculty$AYSALARY[faculty$FEMALE == 1] and faculty$AYSALARY[faculty$FEMALE == 0] 
## t = -13.28, df = 523.2, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0 
## 95 percent confidence interval:
##  -14203 -10543 
## sample estimates:
## mean of x mean of y 
##     39080     51453

reject null hypothesis of no significant differnce in means

women make significantly less on than men on average

4) does there appear to be a relationship between salary and number of years of employment:

salary.yrs.cor <- cor(faculty$AYSALARY, faculty$YRBG)  #simple correlation
plot(faculty$AYSALARY ~ faculty$YRBG, main = "Scatterpolt, Salary vs. No. Years Employed", 
    xlab = "No. Yrs Employed", ylab = "Salary", pch = 19)  #scatterplot
abline(lm(faculty$AYSALARY ~ faculty$YRBG), col = "red")  # adds regression line (salary as a linear fcn of yrs employed)

plot of chunk unnamed-chunk-8

print(salary.yrs.cor)
## [1] 0.6166

yes, corrleation = 0.62

5) combine R1, R2, R7 into one categorical variable; does one category appear to have higher salaries?:

faculty$RANK[faculty$R1 == 1] <- 3
faculty$RANK[faculty$R2 == 1] <- 2
faculty$RANK[faculty$R7 == 1] <- 1
aggregate(faculty$AYSALARY, list(faculty$RANK), mean)  #average salary by category
##   Group.1     x
## 1       1 28374
## 2       2 48058
## 3       3 61622
boxplot(faculty$AYSALARY ~ faculty$RANK, main = "Annual Salary by Rank", ylab = "Annual Salary ($)", 
    xlab = "1 = Instructor/Lecturer, 2 = Associate Professor, 3 = Full Professor", 
    col = rainbow(3))

plot of chunk unnamed-chunk-10

full professors have higher salaries