university = read.csv('/Users/shenjiayuan/Desktop/cwurData.csv')
university2 = read.csv('/Users/shenjiayuan/Desktop/df2.csv')
library(countrycode)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
# add the continent name

university$continent <- countrycode(sourcevar = university$country,
                             origin = "country.name",
                             destination = "continent")
#add the contry code
university$country_code <- countrycode(sourcevar = university$country,
                             origin = "country.name",
                             destination = "iso3c")
lm(world_rank~ patents+quality_of_education+alumni_employment+quality_of_faculty+broad_impact, data=university)%>%
  summary()
## 
## Call:
## lm(formula = world_rank ~ patents + quality_of_education + alumni_employment + 
##     quality_of_faculty + broad_impact, data = university)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -644.12  -25.75   15.48   44.73  110.16 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -96.763507   6.268940 -15.435  < 2e-16 ***
## patents                0.103814   0.007660  13.553  < 2e-16 ***
## quality_of_education   0.170436   0.021906   7.780 1.15e-14 ***
## alumni_employment      0.323065   0.011004  29.358  < 2e-16 ***
## quality_of_faculty    -0.102032   0.044334  -2.301   0.0215 *  
## broad_impact           0.791272   0.007567 104.568  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 71.07 on 1994 degrees of freedom
##   (200 observations deleted due to missingness)
## Multiple R-squared:  0.9396, Adjusted R-squared:  0.9394 
## F-statistic:  6200 on 5 and 1994 DF,  p-value: < 2.2e-16
write.csv(university, file = "df.csv")
lm(world_rank~ patents+quality_of_education+alumni_employment+quality_of_faculty+broad_impact, data=university2)%>%
  summary()
## 
## Call:
## lm(formula = world_rank ~ patents + quality_of_education + alumni_employment + 
##     quality_of_faculty + broad_impact, data = university2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -644.12  -25.75   15.48   44.73  110.16 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -96.763507   6.268940 -15.435  < 2e-16 ***
## patents                0.103814   0.007660  13.553  < 2e-16 ***
## quality_of_education   0.170436   0.021906   7.780 1.15e-14 ***
## alumni_employment      0.323065   0.011004  29.358  < 2e-16 ***
## quality_of_faculty    -0.102032   0.044334  -2.301   0.0215 *  
## broad_impact           0.791272   0.007567 104.568  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 71.07 on 1994 degrees of freedom
##   (200 observations deleted due to missingness)
## Multiple R-squared:  0.9396, Adjusted R-squared:  0.9394 
## F-statistic:  6200 on 5 and 1994 DF,  p-value: < 2.2e-16