university = read.csv('/Users/shenjiayuan/Desktop/cwurData.csv')
university2 = read.csv('/Users/shenjiayuan/Desktop/df2.csv')
library(countrycode)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
# add the continent name
university$continent <- countrycode(sourcevar = university$country,
origin = "country.name",
destination = "continent")
#add the contry code
university$country_code <- countrycode(sourcevar = university$country,
origin = "country.name",
destination = "iso3c")
lm(world_rank~ patents+quality_of_education+alumni_employment+quality_of_faculty+broad_impact, data=university)%>%
summary()
##
## Call:
## lm(formula = world_rank ~ patents + quality_of_education + alumni_employment +
## quality_of_faculty + broad_impact, data = university)
##
## Residuals:
## Min 1Q Median 3Q Max
## -644.12 -25.75 15.48 44.73 110.16
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -96.763507 6.268940 -15.435 < 2e-16 ***
## patents 0.103814 0.007660 13.553 < 2e-16 ***
## quality_of_education 0.170436 0.021906 7.780 1.15e-14 ***
## alumni_employment 0.323065 0.011004 29.358 < 2e-16 ***
## quality_of_faculty -0.102032 0.044334 -2.301 0.0215 *
## broad_impact 0.791272 0.007567 104.568 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 71.07 on 1994 degrees of freedom
## (200 observations deleted due to missingness)
## Multiple R-squared: 0.9396, Adjusted R-squared: 0.9394
## F-statistic: 6200 on 5 and 1994 DF, p-value: < 2.2e-16
write.csv(university, file = "df.csv")
lm(world_rank~ patents+quality_of_education+alumni_employment+quality_of_faculty+broad_impact, data=university2)%>%
summary()
##
## Call:
## lm(formula = world_rank ~ patents + quality_of_education + alumni_employment +
## quality_of_faculty + broad_impact, data = university2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -644.12 -25.75 15.48 44.73 110.16
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -96.763507 6.268940 -15.435 < 2e-16 ***
## patents 0.103814 0.007660 13.553 < 2e-16 ***
## quality_of_education 0.170436 0.021906 7.780 1.15e-14 ***
## alumni_employment 0.323065 0.011004 29.358 < 2e-16 ***
## quality_of_faculty -0.102032 0.044334 -2.301 0.0215 *
## broad_impact 0.791272 0.007567 104.568 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 71.07 on 1994 degrees of freedom
## (200 observations deleted due to missingness)
## Multiple R-squared: 0.9396, Adjusted R-squared: 0.9394
## F-statistic: 6200 on 5 and 1994 DF, p-value: < 2.2e-16