This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.


if (!require("dplyr")) install.packages("dplyr", dependencies = TRUE)
Loading required package: dplyr
Warning: package ‘dplyr’ was built under R version 4.4.2
Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff,
    setequal, union
library(dplyr)

# read an separate data set
data <- read.delim("graduate-earnings.txt", sep = "\t", header = TRUE)

# Turning states to northeast midwest south and west
state_to_region <- function(state) {
  northeast <- c("CT", "ME", "MA", "NH", "RI", "VT", "NJ", "NY", "PA")
  midwest <- c("IL", "IN", "IA", "KS", "MI", "MN", "MO", "NE", "ND", "OH", "SD", "WI")
  south <- c("AL", "AR", "DE", "FL", "GA", "KY", "LA", "MD", "MS", "NC", "OK", "SC", "TN", "TX", "VA", "WV")
  west <- c("AK", "AZ", "CA", "CO", "HI", "ID", "MT", "NV", "NM", "OR", "UT", "WA", "WY")
  
  if (state %in% northeast) return("Northeast")
  if (state %in% midwest) return("Midwest")
  if (state %in% south) return("South")
  if (state %in% west) return("West")
  return(NA)  # take out anything not recognized as a state
}

#takes states abreviations and splits into abbreviation
data$State <- sapply(strsplit(data$Location, ", "), function(x) x[2])
data$Region <- sapply(data$State, state_to_region) # use the above code to separate the state abbv into regions
data$Public <- as.factor(data$Public)
data <- data %>% filter(!is.na(Region)) ## takes out empty rows

#regression model
model <- lm(Earn ~ SAT + ACT + Price_with_aid + Region + Public, data = data)
model_summary <- summary(model)
print(model_summary)

Call:
lm(formula = Earn ~ SAT + ACT + Price_with_aid + Region + Public, 
    data = data)

Residuals:
     Min       1Q   Median       3Q 
-15792.2  -3340.4   -325.6   2752.9 
     Max 
 22346.2 

Coefficients:
                  Estimate
(Intercept)      1.093e+04
SAT              9.493e+00
ACT              7.333e+02
Price_with_aid   1.304e-01
RegionNortheast  1.989e+03
RegionSouth     -4.279e+02
RegionWest       2.734e+03
Public1          3.858e+03
                Std. Error t value
(Intercept)      1.886e+03   5.795
SAT              4.250e+00   2.234
ACT              1.710e+02   4.289
Price_with_aid   3.851e-02   3.387
RegionNortheast  5.282e+02   3.766
RegionSouth      5.645e+02  -0.758
RegionWest       6.473e+02   4.223
Public1          6.258e+02   6.165
                Pr(>|t|)    
(Intercept)     1.04e-08 ***
SAT             0.025826 *  
ACT             2.05e-05 ***
Price_with_aid  0.000748 ***
RegionNortheast 0.000180 ***
RegionSouth     0.448631    
RegionWest      2.73e-05 ***
Public1         1.20e-09 ***
---
Signif. codes:  
  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’
  0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5297 on 693 degrees of freedom
Multiple R-squared:  0.3875,    Adjusted R-squared:  0.3813 
F-statistic: 62.63 on 7 and 693 DF,  p-value: < 2.2e-16
conf_intervals <- confint(model)
print(conf_intervals)
                        2.5 %
(Intercept)      7.226664e+03
SAT              1.148485e+00
ACT              3.976574e+02
Price_with_aid   5.480345e-02
RegionNortheast  9.522355e+02
RegionSouth     -1.536214e+03
RegionWest       1.462779e+03
Public1          2.629151e+03
                      97.5 %
(Intercept)     1.463376e+04
SAT             1.783721e+01
ACT             1.068977e+03
Price_with_aid  2.060089e-01
RegionNortheast 3.026364e+03
RegionSouth     6.803333e+02
RegionWest      4.004670e+03
Public1         5.086590e+03
#residuals vs fitted values
plot(model, which = 1)


# Variance 
cat("1. The model explains", round(model_summary$r.squared * 100, 2), "% of the variance in earnings.\n")
1. The model explains 38.75 % of the variance in earnings.

#Our research question was trying to understand the relationship between tuition and earnings upon graduation. We also wanted to see how standardized test score, school category, and region of school affected the earnings after graduation. Overall, we found a 38.75% variance in earning which means it fits okay. This was helpful to see that SAT and ACT scores have signifiant positive ffects on earings while tuition after financial aid is not as influencial.The next steps will be to explore the difference between regions and public/private schools more. We could also add more/different factors to understand the relationship more and gain a better understanding of how much earnings are affected by these factors. The additional factors would also allow our data set to more accurately represent its population.

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpUaGlzIGlzIGFuIFtSIE1hcmtkb3duXShodHRwOi8vcm1hcmtkb3duLnJzdHVkaW8uY29tKSBOb3RlYm9vay4gV2hlbiB5b3UgZXhlY3V0ZSBjb2RlIHdpdGhpbiB0aGUgbm90ZWJvb2ssIHRoZSByZXN1bHRzIGFwcGVhciBiZW5lYXRoIHRoZSBjb2RlLiANCg0KVHJ5IGV4ZWN1dGluZyB0aGlzIGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqUnVuKiBidXR0b24gd2l0aGluIHRoZSBjaHVuayBvciBieSBwbGFjaW5nIHlvdXIgY3Vyc29yIGluc2lkZSBpdCBhbmQgcHJlc3NpbmcgKkN0cmwrU2hpZnQrRW50ZXIqLiANCg0KYGBge3J9DQoNCmlmICghcmVxdWlyZSgiZHBseXIiKSkgaW5zdGFsbC5wYWNrYWdlcygiZHBseXIiLCBkZXBlbmRlbmNpZXMgPSBUUlVFKQ0KbGlicmFyeShkcGx5cikNCg0KIyByZWFkIGFuIHNlcGFyYXRlIGRhdGEgc2V0DQpkYXRhIDwtIHJlYWQuZGVsaW0oImdyYWR1YXRlLWVhcm5pbmdzLnR4dCIsIHNlcCA9ICJcdCIsIGhlYWRlciA9IFRSVUUpDQoNCiMgVHVybmluZyBzdGF0ZXMgdG8gbm9ydGhlYXN0IG1pZHdlc3Qgc291dGggYW5kIHdlc3QNCnN0YXRlX3RvX3JlZ2lvbiA8LSBmdW5jdGlvbihzdGF0ZSkgew0KICBub3J0aGVhc3QgPC0gYygiQ1QiLCAiTUUiLCAiTUEiLCAiTkgiLCAiUkkiLCAiVlQiLCAiTkoiLCAiTlkiLCAiUEEiKQ0KICBtaWR3ZXN0IDwtIGMoIklMIiwgIklOIiwgIklBIiwgIktTIiwgIk1JIiwgIk1OIiwgIk1PIiwgIk5FIiwgIk5EIiwgIk9IIiwgIlNEIiwgIldJIikNCiAgc291dGggPC0gYygiQUwiLCAiQVIiLCAiREUiLCAiRkwiLCAiR0EiLCAiS1kiLCAiTEEiLCAiTUQiLCAiTVMiLCAiTkMiLCAiT0siLCAiU0MiLCAiVE4iLCAiVFgiLCAiVkEiLCAiV1YiKQ0KICB3ZXN0IDwtIGMoIkFLIiwgIkFaIiwgIkNBIiwgIkNPIiwgIkhJIiwgIklEIiwgIk1UIiwgIk5WIiwgIk5NIiwgIk9SIiwgIlVUIiwgIldBIiwgIldZIikNCiAgDQogIGlmIChzdGF0ZSAlaW4lIG5vcnRoZWFzdCkgcmV0dXJuKCJOb3J0aGVhc3QiKQ0KICBpZiAoc3RhdGUgJWluJSBtaWR3ZXN0KSByZXR1cm4oIk1pZHdlc3QiKQ0KICBpZiAoc3RhdGUgJWluJSBzb3V0aCkgcmV0dXJuKCJTb3V0aCIpDQogIGlmIChzdGF0ZSAlaW4lIHdlc3QpIHJldHVybigiV2VzdCIpDQogIHJldHVybihOQSkgICMgdGFrZSBvdXQgYW55dGhpbmcgbm90IHJlY29nbml6ZWQgYXMgYSBzdGF0ZQ0KfQ0KDQojdGFrZXMgc3RhdGVzIGFicmV2aWF0aW9ucyBhbmQgc3BsaXRzIGludG8gYWJicmV2aWF0aW9uDQpkYXRhJFN0YXRlIDwtIHNhcHBseShzdHJzcGxpdChkYXRhJExvY2F0aW9uLCAiLCAiKSwgZnVuY3Rpb24oeCkgeFsyXSkNCmRhdGEkUmVnaW9uIDwtIHNhcHBseShkYXRhJFN0YXRlLCBzdGF0ZV90b19yZWdpb24pICMgdXNlIHRoZSBhYm92ZSBjb2RlIHRvIHNlcGFyYXRlIHRoZSBzdGF0ZSBhYmJ2IGludG8gcmVnaW9ucw0KZGF0YSRQdWJsaWMgPC0gYXMuZmFjdG9yKGRhdGEkUHVibGljKQ0KZGF0YSA8LSBkYXRhICU+JSBmaWx0ZXIoIWlzLm5hKFJlZ2lvbikpICMjIHRha2VzIG91dCBlbXB0eSByb3dzDQoNCiNyZWdyZXNzaW9uIG1vZGVsDQptb2RlbCA8LSBsbShFYXJuIH4gU0FUICsgQUNUICsgUHJpY2Vfd2l0aF9haWQgKyBSZWdpb24gKyBQdWJsaWMsIGRhdGEgPSBkYXRhKQ0KbW9kZWxfc3VtbWFyeSA8LSBzdW1tYXJ5KG1vZGVsKQ0KcHJpbnQobW9kZWxfc3VtbWFyeSkNCg0KY29uZl9pbnRlcnZhbHMgPC0gY29uZmludChtb2RlbCkNCnByaW50KGNvbmZfaW50ZXJ2YWxzKQ0KDQojcmVzaWR1YWxzIHZzIGZpdHRlZCB2YWx1ZXMNCnBsb3QobW9kZWwsIHdoaWNoID0gMSkNCg0KIyBWYXJpYW5jZSANCmNhdCgiMS4gVGhlIG1vZGVsIGV4cGxhaW5zIiwgcm91bmQobW9kZWxfc3VtbWFyeSRyLnNxdWFyZWQgKiAxMDAsIDIpLCAiJSBvZiB0aGUgdmFyaWFuY2UgaW4gZWFybmluZ3MuXG4iKQ0KDQpgYGANCiNPdXIgcmVzZWFyY2ggcXVlc3Rpb24gd2FzIHRyeWluZyB0byB1bmRlcnN0YW5kIHRoZSByZWxhdGlvbnNoaXAgYmV0d2VlbiB0dWl0aW9uIGFuZCBlYXJuaW5ncyB1cG9uIGdyYWR1YXRpb24uIFdlIGFsc28gd2FudGVkIHRvIHNlZSBob3cgc3RhbmRhcmRpemVkIHRlc3Qgc2NvcmUsIHNjaG9vbCBjYXRlZ29yeSwgYW5kIHJlZ2lvbiBvZiBzY2hvb2wgYWZmZWN0ZWQgdGhlIGVhcm5pbmdzIGFmdGVyIGdyYWR1YXRpb24uIE92ZXJhbGwsIHdlIGZvdW5kIGEgMzguNzUlIHZhcmlhbmNlIGluIGVhcm5pbmcgd2hpY2ggbWVhbnMgaXQgZml0cyBva2F5LiBUaGlzIHdhcyBoZWxwZnVsIHRvIHNlZSB0aGF0IFNBVCBhbmQgQUNUIHNjb3JlcyBoYXZlIHNpZ25pZmlhbnQgcG9zaXRpdmUgZmZlY3RzIG9uIGVhcmluZ3Mgd2hpbGUgdHVpdGlvbiBhZnRlciBmaW5hbmNpYWwgYWlkIGlzIG5vdCBhcyBpbmZsdWVuY2lhbC5UaGUgbmV4dCBzdGVwcyB3aWxsIGJlIHRvIGV4cGxvcmUgdGhlIGRpZmZlcmVuY2UgYmV0d2VlbiByZWdpb25zIGFuZCBwdWJsaWMvcHJpdmF0ZSBzY2hvb2xzIG1vcmUuIFdlIGNvdWxkIGFsc28gYWRkIG1vcmUvZGlmZmVyZW50IGZhY3RvcnMgdG8gdW5kZXJzdGFuZCB0aGUgcmVsYXRpb25zaGlwIG1vcmUgYW5kIGdhaW4gYSBiZXR0ZXIgdW5kZXJzdGFuZGluZyBvZiBob3cgbXVjaCBlYXJuaW5ncyBhcmUgYWZmZWN0ZWQgYnkgdGhlc2UgZmFjdG9ycy4gVGhlIGFkZGl0aW9uYWwgZmFjdG9ycyB3b3VsZCBhbHNvIGFsbG93IG91ciBkYXRhIHNldCB0byBtb3JlIGFjY3VyYXRlbHkgcmVwcmVzZW50IGl0cyBwb3B1bGF0aW9uLg0KDQoNCg==