This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(dslabs)
library(lmtest)
## Cargando paquete requerido: zoo
##
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(corrplot)
## corrplot 0.94 loaded
library(reshape2)
library(readxl)
library(readr)
cwurData <- read_csv("~/ESTUDIOS 2024/Diplomado de Big data UAO/Rstudio diplomado big data/CLASE 14/cwurData.csv")
## Rows: 2200 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): institution, country
## dbl (12): world_rank, national_rank, quality_of_education, alumni_employment...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(cwurData)
cwurData %>% ggplot(
aes(
x = score, # log10(height/10^6),
y = country # log10(country)
)
) +
geom_point(show.legend = FALSE) +
xlab("score") +
ylab("country")
cwurData %>%
summarise(cor(year , score, method = "pearson" ))
## # A tibble: 1 × 1
## `cor(year, score, method = "pearson")`
## <dbl>
## 1 -0.239
modeloh <- cwurData %>% select(score,country) %>%
lm( cwurData$score~ cwurData$country, data =.)
summary <- summary(modeloh)
summary
##
## Call:
## lm(formula = cwurData$score ~ cwurData$country, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.294 -2.796 -0.766 0.258 48.165
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 44.67286 2.76982 16.128 <2e-16 ***
## cwurData$countryAustralia 1.15266 2.93221 0.393 0.6943
## cwurData$countryAustria 0.46673 3.14795 0.148 0.8821
## cwurData$countryBelgium 2.33814 3.21824 0.727 0.4676
## cwurData$countryBrazil 0.10825 3.02716 0.036 0.9715
## cwurData$countryBulgaria -0.33786 5.87568 -0.058 0.9542
## cwurData$countryCanada 2.68645 2.90135 0.926 0.3546
## cwurData$countryChile 0.09464 3.79274 0.025 0.9801
## cwurData$countryChina 0.31972 2.82728 0.113 0.9100
## cwurData$countryColombia -0.24036 4.59323 -0.052 0.9583
## cwurData$countryCroatia 0.09714 5.87568 0.017 0.9868
## cwurData$countryCyprus -0.46286 5.87568 -0.079 0.9372
## cwurData$countryCzech Republic 0.08814 3.61141 0.024 0.9805
## cwurData$countryDenmark 3.10131 3.48528 0.890 0.3737
## cwurData$countryEgypt -0.44286 3.79274 -0.117 0.9071
## cwurData$countryEstonia 0.13714 5.87568 0.023 0.9814
## cwurData$countryFinland 0.85864 3.21824 0.267 0.7896
## cwurData$countryFrance 1.69448 2.85738 0.593 0.5532
## cwurData$countryGermany 1.74027 2.85288 0.610 0.5419
## cwurData$countryGreece 0.18143 3.39233 0.053 0.9574
## cwurData$countryHong Kong 1.62214 3.48528 0.465 0.6417
## cwurData$countryHungary -0.06952 3.48528 -0.020 0.9841
## cwurData$countryIceland 0.30714 5.87568 0.052 0.9583
## cwurData$countryIndia 0.04037 3.06664 0.013 0.9895
## cwurData$countryIran -0.40286 3.32090 -0.121 0.9035
## cwurData$countryIreland 0.56214 3.32090 0.169 0.8656
## cwurData$countryIsrael 7.98123 3.18009 2.510 0.0122 *
## cwurData$countryItaly 0.77735 2.86903 0.271 0.7865
## cwurData$countryJapan 2.55670 2.83014 0.903 0.3664
## cwurData$countryLebanon -0.01786 5.87568 -0.003 0.9976
## cwurData$countryLithuania -0.31786 5.87568 -0.054 0.9569
## cwurData$countryMalaysia 0.33548 4.07707 0.082 0.9344
## cwurData$countryMexico 0.42964 4.59323 0.094 0.9255
## cwurData$countryNetherlands 3.28542 3.08606 1.065 0.2872
## cwurData$countryNew Zealand 0.43964 3.48528 0.126 0.8996
## cwurData$countryNorway 1.65714 3.48528 0.475 0.6345
## cwurData$countryPoland -0.17563 3.26427 -0.054 0.9571
## cwurData$countryPortugal 0.42214 3.39233 0.124 0.9010
## cwurData$countryPuerto Rico -0.49786 5.87568 -0.085 0.9325
## cwurData$countryRomania -0.53952 5.05698 -0.107 0.9150
## cwurData$countryRussia 2.70825 3.69310 0.733 0.4634
## cwurData$countrySaudi Arabia -0.16161 3.79274 -0.043 0.9660
## cwurData$countrySerbia -0.25286 5.87568 -0.043 0.9657
## cwurData$countrySingapore 5.48714 4.29099 1.279 0.2011
## cwurData$countrySlovak Republic -0.18286 5.87568 -0.031 0.9752
## cwurData$countrySlovenia -0.05786 4.59323 -0.013 0.9900
## cwurData$countrySouth Africa 1.72614 3.61141 0.478 0.6327
## cwurData$countrySouth Korea 1.52548 2.90135 0.526 0.5991
## cwurData$countrySpain 0.31801 2.88703 0.110 0.9123
## cwurData$countrySweden 3.21714 3.14795 1.022 0.3069
## cwurData$countrySwitzerland 6.53599 3.12049 2.095 0.0363 *
## cwurData$countryTaiwan 0.33953 2.97311 0.114 0.9091
## cwurData$countryThailand 0.44381 4.07707 0.109 0.9133
## cwurData$countryTurkey -0.19186 3.21824 -0.060 0.9525
## cwurData$countryUganda -0.39286 5.87568 -0.067 0.9467
## cwurData$countryUnited Arab Emirates -0.45286 5.87568 -0.077 0.9386
## cwurData$countryUnited Kingdom 4.80180 2.83635 1.693 0.0906 .
## cwurData$countryUruguay -0.41786 5.87568 -0.071 0.9433
## cwurData$countryUSA 7.16700 2.78669 2.572 0.0102 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.328 on 2141 degrees of freedom
## Multiple R-squared: 0.1319, Adjusted R-squared: 0.1084
## F-statistic: 5.608 on 58 and 2141 DF, p-value: < 2.2e-16
intercepto <- modeloh[["coefficients"]][["(Intercept)"]]
`
cwurData%>% ggplot(
aes(
x = score,
y = country
)
) +
geom_point(show.legend = FALSE) +
xlab("score") +
ylab("country")
`