Examen-Parcial.utf8

set.seed(111)

Examen Parcial Jesus Zarate

#Pregunta 1
library(htmltab)

## Warning: package 'htmltab' was built under R version 3.6.3

link = "https://en.wikipedia.org/wiki/Democracy_Index"
Path ='//*[@id="mw-content-text"]/div/table[2]'
DemoIndex = htmltab(doc = link, 
                which =Path)
DemoIndex=na.omit(DemoIndex)
DemoIndex[,c(3:11)]=lapply(DemoIndex[,c(3:11)],as.numeric)

## Warning in lapply(DemoIndex[, c(3:11)], as.numeric): NAs introducidos por
## coerción

## Warning in lapply(DemoIndex[, c(3:11)], as.numeric): NAs introducidos por
## coerción

## Warning in lapply(DemoIndex[, c(3:11)], as.numeric): NAs introducidos por
## coerción

str(DemoIndex)

## 'data.frame':    167 obs. of  11 variables:
##  $ Rank >> Rank                                                    : chr  "1" "2" "3" "4" ...
##  $ Country >> Country                                              : chr  "Â Norway" "Â Iceland" "Â Sweden" "Â New Zealand" ...
##  $ Score >> Score                                                  : num  9.87 9.58 9.39 9.26 9.25 9.24 9.22 9.22 9.09 9.03 ...
##  $ Electoral processand pluralism >> Electoral processand pluralism: num  10 10 9.58 10 10 10 10 9.58 10 9.58 ...
##  $ FunctioÂning ofgovernÂment >> FunctioÂning ofgovernÂment    : num  9.64 9.29 9.64 9.29 8.93 7.86 9.29 9.64 8.93 9.29 ...
##  $ PoliticalparticiÂpation >> PoliticalparticiÂpation            : num  10 8.89 8.33 8.89 8.89 8.33 8.33 7.78 7.78 7.78 ...
##  $ Politicalculture >> Politicalculture                            : num  10 10 10 8.13 8.75 10 9.38 9.38 8.75 9.38 ...
##  $ Civilliberties >> Civilliberties                                : num  9.71 9.71 9.41 10 9.71 10 9.12 9.71 10 9.12 ...
##  $ Regimetype >> Regimetype                                        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Region >> Region                                                : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Changes fromlast year >> Changes fromlast year                  : num  NA NA NA NA NA NA NA NA NA NA ...

library(cluster)
g.dist = daisy(DemoIndex[,c(3:11)], metric="gower")    #Para ver la distancia entre puntos

## Warning in min(x): ningún argumento finito para min; retornando Inf

## Warning in max(x): ningun argumento finito para max; retornando -Inf

## Warning in min(x): ningún argumento finito para min; retornando Inf

## Warning in max(x): ningun argumento finito para max; retornando -Inf

## Warning in min(x): ningún argumento finito para min; retornando Inf

## Warning in max(x): ningun argumento finito para max; retornando -Inf

pam.resultado=pam(g.dist,5,cluster.only = F)  #Propones la distancia, pero se puede cambiar
DemoIndex$clusterPT=pam.resultado$cluster  #Crea una nueva columna con el resultado de los cluters

library(factoextra) #Permite visualzargraficamente

## Warning: package 'factoextra' was built under R version 3.6.3

## Loading required package: ggplot2

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

fviz_cluster(object = list(data=g.dist, cluster = DemoIndex$clusterPT),geom = c("text"), 
             ellipse.type = "convex")

View(DemoIndex)
DemoIndex[,c(1,5,6)]=NULL



#Pregunta 2:
#El archivo seguridadUSA.xlsx informa sobre diversas variables relacionadas con 
#la seguridad pública en cada estado de USA (si el valor es menor indica una mejor situación).
#¿Cuál es el Estado con la peor situación de asesinatos?
  
library(rio)
Esesinato="https://github.com/jesuszaratev/covid/raw/master/archivo.xlsx"
Asesinatos=import(Esesinato)
names(Asesinatos)

## [1] "estado"      "asesinatos"  "asaltos"     "violaciones" "score"

#Georgia es el pais con mayor numer de asesinatos 

#Pregunta 3:
#Use los dos indicadores o scores de democracia y felicidad, 
#así como el de tasa de crecimiento urbano como entrada para la clusterización 
#(asumiendo que mayor tasa es algo negativo). 
#Si se le pide hallar 3 clusters: "Bueno", "Intermedio", "Malo"...
#¿en qué cluster estaría Perú?

library(htmltab) 
link = "https://en.wikipedia.org/wiki/World_Happiness_Report"
Path ='//*[@id="mw-content-text"]/div/table'
Happy = htmltab(doc = link, 
                    which =Path) 
Happy$`Overall rank`=NULL
Happy[,c(3:8)]=NULL
names(Happy) = c("Pais", "HappyScore")
Happy$Pais =gsub("Â", "", Happy$Pais)
#-------------------------
library(htmltab) 
link = "https://en.wikipedia.org/wiki/Democracy_Index"
Path ='//*[@id="mw-content-text"]/div/table[2]'
Democracy = htmltab(doc = link, 
                which =Path)

Democracy$`Rank >> Rank`=NULL
Democracy[,c(3:10)]=NULL
names(Democracy) = c("Pais", "DemoScore")

Democracy$Pais =gsub("Â", "", Democracy$Pais)

#------------------------
library(htmltab) 
link = "https://www.cia.gov/library/publications/resources/the-world-factbook/fields/349.html"
Path ='//*[@id="fieldListing"]'
Cia = htmltab(doc = link, 
                which =Path)

## No encoding supplied: defaulting to UTF-8.

names(Cia) = c("Pais", "Urban")
PATRON="(\\d+\\.*\\d*)(?=\\%)"
COLSUCIA=Cia$Urban
# UNA COLUMNA
library(stringr)
Cia$Urban=str_extract_all(string = COLSUCIA,pattern=PATRON,simplify = T)[,1]
str(Cia)

## 'data.frame':    231 obs. of  2 variables:
##  $ Pais : chr  "Afghanistan" "Albania" "Algeria" "American Samoa" ...
##  $ Urban: chr  "26" "62.1" "73.7" "87.2" ...

Cia$Urban = as.numeric(Cia$Urban)
#------------------------
total1=merge(Democracy,Happy,by.x='Pais', by.y='Pais') 
total2=merge(total1,Cia,by.x='Pais', by.y='Pais') 
total3=merge(Cia,Democracy,by.x='Pais', by.y='Pais') 


#Pregunta 4

library(htmltab) 
link = "https://en.wikipedia.org/wiki/World_Happiness_Report"
Path ='//*[@id="mw-content-text"]/div/table'
DataHappy = htmltab(doc = link, 
              which =Path) 

DataHappy$`Overall rank`=NULL
DataHappy=na.omit(DataHappy)
str(DataHappy)

## 'data.frame':    156 obs. of  8 variables:
##  $ Country or region           : chr  "Â Finland" "Â Denmark" "Â Norway" "Â Iceland" ...
##  $ Score                       : chr  "7.769" "7.600" "7.554" "7.494" ...
##  $ GDP per capita              : chr  "1.340" "1.383" "1.488" "1.380" ...
##  $ Social support              : chr  "1.587" "1.573" "1.582" "1.624" ...
##  $ Healthy life expectancy     : chr  "0.986" "0.996" "1.028" "1.026" ...
##  $ Freedom to make life choices: chr  "0.596" "0.592" "0.603" "0.591" ...
##  $ Generosity                  : chr  "0.153" "0.252" "0.271" "0.354" ...
##  $ Perceptions of corruption   : chr  "0.393" "0.410" "0.341" "0.118" ...

DataHappy[,c(2:8)]=lapply(DataHappy[,c(2:8)],as.numeric)

library(cluster)
g.dist = daisy(DataHappy[,c(2:8)], metric="gower")    #Para ver la distancia entre puntos 
pam.resultado=pam(g.dist,5,cluster.only = F)  #Propones la distancia, pero se puede cambiar
DataHappy$clusterPT=pam.resultado$cluster  #Crea una nueva columna con el resultado de los cluters

library(factoextra) #Permite visualzargraficamente 
fviz_cluster(object = list(data=g.dist, cluster = DataHappy$clusterPT),geom = c("text"), 
             ellipse.type = "convex")

#segun la nueva columna el peru se encuentra en el 2 grupo que representa 
#Peru se encuentra en el grupo 2 Seria feliz 

#Pregunta 5----------

library(rio)
Esesinato="https://github.com/jesuszaratev/covid/raw/master/archivo.xlsx"
Asesinatos=import(Esesinato)
names(Asesinatos)

## [1] "estado"      "asesinatos"  "asaltos"     "violaciones" "score"

Asesinatos=na.omit(Asesinatos)
str(Asesinatos)

## 'data.frame':    50 obs. of  5 variables:
##  $ estado     : chr  "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ asesinatos : num  45 35 29 31 33 28 10 19 48 50 ...
##  $ asaltos    : num  35 43 47 31 44 33 15 36 49 34 ...
##  $ violaciones: num  29 49 43 24 48 47 7 15 44 36 ...
##  $ score      : num  36 47 41 30 44 33 10 23 50 42 ...

library(cluster)
g.dist = daisy(Asesinatos[,c(2:4)], metric="gower")    #Para ver la distancia entre puntos 
pam.resultado=pam(g.dist,5,cluster.only = F)  #Propones la distancia, pero se puede cambiar
Asesinatos$clusterPT=pam.resultado$cluster  #Crea una nueva columna con el resultado de los cluters

library(factoextra) #Permite visualzargraficamente 
fviz_cluster(object = list(data=g.dist, cluster = Asesinatos$clusterPT),geom = c("text"), 
             ellipse.type = "convex")

#Todo lo he trabajado en un scrpit normal y lo he copiado de un r.notebook
#Olvide copiar todas las preguntas