R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(dslabs)
library(lmtest)
## Cargando paquete requerido: zoo
## 
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(corrplot)
## corrplot 0.94 loaded
library(reshape2)
library(readxl)
library(readr)
cwurData <- read_csv("~/ESTUDIOS 2024/Diplomado de Big data UAO/Rstudio diplomado big data/CLASE 14/cwurData.csv")
## Rows: 2200 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): institution, country
## dbl (12): world_rank, national_rank, quality_of_education, alumni_employment...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(cwurData)
cwurData %>% ggplot(
                    aes(
                        x = score, # log10(height/10^6), 
                        y = country    # log10(country) 
                        )
                    ) +
            geom_point(show.legend = FALSE)  +
            xlab("score") +
            ylab("country") 

cwurData %>% 
  summarise(cor(year , score, method = "pearson" ))
## # A tibble: 1 × 1
##   `cor(year, score, method = "pearson")`
##                                    <dbl>
## 1                                 -0.239
modeloh <- cwurData %>% select(score,country) %>%
                lm( cwurData$score~ cwurData$country, data =.)

summary <- summary(modeloh)
summary
## 
## Call:
## lm(formula = cwurData$score ~ cwurData$country, data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.294 -2.796 -0.766  0.258 48.165 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                          44.67286    2.76982  16.128   <2e-16 ***
## cwurData$countryAustralia             1.15266    2.93221   0.393   0.6943    
## cwurData$countryAustria               0.46673    3.14795   0.148   0.8821    
## cwurData$countryBelgium               2.33814    3.21824   0.727   0.4676    
## cwurData$countryBrazil                0.10825    3.02716   0.036   0.9715    
## cwurData$countryBulgaria             -0.33786    5.87568  -0.058   0.9542    
## cwurData$countryCanada                2.68645    2.90135   0.926   0.3546    
## cwurData$countryChile                 0.09464    3.79274   0.025   0.9801    
## cwurData$countryChina                 0.31972    2.82728   0.113   0.9100    
## cwurData$countryColombia             -0.24036    4.59323  -0.052   0.9583    
## cwurData$countryCroatia               0.09714    5.87568   0.017   0.9868    
## cwurData$countryCyprus               -0.46286    5.87568  -0.079   0.9372    
## cwurData$countryCzech Republic        0.08814    3.61141   0.024   0.9805    
## cwurData$countryDenmark               3.10131    3.48528   0.890   0.3737    
## cwurData$countryEgypt                -0.44286    3.79274  -0.117   0.9071    
## cwurData$countryEstonia               0.13714    5.87568   0.023   0.9814    
## cwurData$countryFinland               0.85864    3.21824   0.267   0.7896    
## cwurData$countryFrance                1.69448    2.85738   0.593   0.5532    
## cwurData$countryGermany               1.74027    2.85288   0.610   0.5419    
## cwurData$countryGreece                0.18143    3.39233   0.053   0.9574    
## cwurData$countryHong Kong             1.62214    3.48528   0.465   0.6417    
## cwurData$countryHungary              -0.06952    3.48528  -0.020   0.9841    
## cwurData$countryIceland               0.30714    5.87568   0.052   0.9583    
## cwurData$countryIndia                 0.04037    3.06664   0.013   0.9895    
## cwurData$countryIran                 -0.40286    3.32090  -0.121   0.9035    
## cwurData$countryIreland               0.56214    3.32090   0.169   0.8656    
## cwurData$countryIsrael                7.98123    3.18009   2.510   0.0122 *  
## cwurData$countryItaly                 0.77735    2.86903   0.271   0.7865    
## cwurData$countryJapan                 2.55670    2.83014   0.903   0.3664    
## cwurData$countryLebanon              -0.01786    5.87568  -0.003   0.9976    
## cwurData$countryLithuania            -0.31786    5.87568  -0.054   0.9569    
## cwurData$countryMalaysia              0.33548    4.07707   0.082   0.9344    
## cwurData$countryMexico                0.42964    4.59323   0.094   0.9255    
## cwurData$countryNetherlands           3.28542    3.08606   1.065   0.2872    
## cwurData$countryNew Zealand           0.43964    3.48528   0.126   0.8996    
## cwurData$countryNorway                1.65714    3.48528   0.475   0.6345    
## cwurData$countryPoland               -0.17563    3.26427  -0.054   0.9571    
## cwurData$countryPortugal              0.42214    3.39233   0.124   0.9010    
## cwurData$countryPuerto Rico          -0.49786    5.87568  -0.085   0.9325    
## cwurData$countryRomania              -0.53952    5.05698  -0.107   0.9150    
## cwurData$countryRussia                2.70825    3.69310   0.733   0.4634    
## cwurData$countrySaudi Arabia         -0.16161    3.79274  -0.043   0.9660    
## cwurData$countrySerbia               -0.25286    5.87568  -0.043   0.9657    
## cwurData$countrySingapore             5.48714    4.29099   1.279   0.2011    
## cwurData$countrySlovak Republic      -0.18286    5.87568  -0.031   0.9752    
## cwurData$countrySlovenia             -0.05786    4.59323  -0.013   0.9900    
## cwurData$countrySouth Africa          1.72614    3.61141   0.478   0.6327    
## cwurData$countrySouth Korea           1.52548    2.90135   0.526   0.5991    
## cwurData$countrySpain                 0.31801    2.88703   0.110   0.9123    
## cwurData$countrySweden                3.21714    3.14795   1.022   0.3069    
## cwurData$countrySwitzerland           6.53599    3.12049   2.095   0.0363 *  
## cwurData$countryTaiwan                0.33953    2.97311   0.114   0.9091    
## cwurData$countryThailand              0.44381    4.07707   0.109   0.9133    
## cwurData$countryTurkey               -0.19186    3.21824  -0.060   0.9525    
## cwurData$countryUganda               -0.39286    5.87568  -0.067   0.9467    
## cwurData$countryUnited Arab Emirates -0.45286    5.87568  -0.077   0.9386    
## cwurData$countryUnited Kingdom        4.80180    2.83635   1.693   0.0906 .  
## cwurData$countryUruguay              -0.41786    5.87568  -0.071   0.9433    
## cwurData$countryUSA                   7.16700    2.78669   2.572   0.0102 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.328 on 2141 degrees of freedom
## Multiple R-squared:  0.1319, Adjusted R-squared:  0.1084 
## F-statistic: 5.608 on 58 and 2141 DF,  p-value: < 2.2e-16
intercepto <- modeloh[["coefficients"]][["(Intercept)"]]

`

cwurData%>% ggplot(
  aes(
    x = score, 
    y = country    
  )
) +
  geom_point(show.legend = FALSE)  +
  xlab("score") +
  ylab("country") 

`