Oscar Padilla - 13000285

Se cargan las librerias
library(rvest)
## Warning: package 'rvest' was built under R version 3.5.2
## Loading required package: xml2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.2
## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.1       v purrr   0.3.2  
## v tibble  2.1.1       v dplyr   0.8.0.1
## v tidyr   0.8.3       v stringr 1.4.0  
## v readr   1.3.1       v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.5.3
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'tidyr' was built under R version 3.5.3
## Warning: package 'readr' was built under R version 3.5.3
## Warning: package 'purrr' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## Warning: package 'stringr' was built under R version 3.5.2
## Warning: package 'forcats' was built under R version 3.5.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter()         masks stats::filter()
## x readr::guess_encoding() masks rvest::guess_encoding()
## x dplyr::lag()            masks stats::lag()
## x purrr::pluck()          masks rvest::pluck()
library(stringr)
library(tm)
## Warning: package 'tm' was built under R version 3.5.3
## Loading required package: NLP
## Warning: package 'NLP' was built under R version 3.5.2
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
library(dplyr)
library(tidyr)
library(knitr)
## Warning: package 'knitr' was built under R version 3.5.3
library(parallel)

Se obtienen las actrices de wikipedia

actrices<-
  read_html("https://en.wikipedia.org/wiki/List_of_American_film_actresses") %>% 
  html_nodes("a") %>% 
  html_attr('href') %>% 
  tibble() %>%
  rename(links='.') %>% 
  filter(str_detect(links,"/wiki/")) %>%
  filter(!str_detect(links,"#")) %>%
  filter(!str_detect(links,"image")) %>% 
  filter(!str_detect(tolower(links),"file"))%>%
  filter(!str_detect(links,"https:")) %>% 
  filter(!str_detect(links,":")) %>%
  filter(!str_detect(links,"List_of_American_television_actresses")) %>% 
  unlist()

Para efectos del proyecto final únicamente trabajaremos con 30 actrices

actrices <- actrices[-c(32:1954)]

Funcion para buscar referencias de las actrices

buscar <- function(link) {
  linkCompleto <- paste("https://en.wikipedia.org", link ,sep="")
  primera_capa <- read_html(linkCompleto) %>% 
    html_nodes("a") %>% 
    html_attr('href') %>% 
    tibble() %>%
    rename(links='.') %>% 
    filter(str_detect(links,"/wiki/")) %>%
    filter(!str_detect(links,"#")) %>%
    filter(!str_detect(links,"image")) %>% 
    filter(!str_detect(tolower(links),"file"))%>%
    filter(!str_detect(links,"https:")) %>% 
    filter(!str_detect(links,":")) %>%
    filter(!str_detect(links,"List_of_American_television_actresses"))
    #ingresar valores a matriz
  #print(nrow(primera_capa))
}

Buscamos las referencias que tengan las actrices con otras actrices

lapply(actrices, buscar)

Parseamos las actrices para que únicamente se guarden sus nombres

for(i in 1:30){
  actrices[i] <- substr(actrices[i],7,nchar(actrices[i]))
}

Armamos la cadena de Markov

bigrams<-
  lapply(ngrams(actrices,2), paste, collapse=" ") %>% unlist()
bigrams<-
  table(bigrams) %>% as.data.frame()
bigrams <- bigrams %>% 
  separate(bigrams,into=c("word1","word2"),sep=" ")
muestra_words <- bigrams %>% filter(Freq > 5)
markov_chain<-
  bigrams %>% 
  spread(key = word2,value = Freq,fill = 0)
mm_chain<-
  as.matrix(markov_chain[,-1])
row.names(mm_chain)<-markov_chain$word1
dim(mm_chain)
## [1] 30 30

Armamos la matriz de Markov

mm_chain <- mm_chain/rowSums(mm_chain)
s <- matrix(1,ncol=30,nrow=30)
s <- s/rowSums(s)
M <- 0.85*mm_chain+0.15*s
matprod.par <- function(cl, A, B){
  if (ncol(A) != nrow(B)) stop("Matrices do not conforme")
  idx  <- splitIndices(nrow(A), length(cl))
  Alist <- lapply(idx, function(ii) A[ii,,drop=FALSE])
  ans  <- clusterApply(cl, Alist, get("%*%"), B)
  do.call(rbind, ans) 
}
nc <- detectCores()
c1 <- makeCluster(rep("localhost", nc))
M1 <- matprod.par(c1,M,M)
M <-  M %*% M
stopCluster(c1)

Mostramos la Matriz de Markov

kable(M, caption = "Matriz de Markov de Actrices")
Matriz de Markov de Actrices
/wiki/Ana_Alicia Amy_Acker Amy_Adams Bettye_Ackerman Brooke_Adams_(actress) Candice_Accola Caroline_Aaron Christina_Aguilera Diahnne_Abbott Dianna_Agron Donzaleigh_Abernathy Edie_Adams Erika_Alexander Jaimie_Alexander Jane_Adams_(actress) Jane_Alexander Jean_Acker Jessica_Alba Joey_Lauren_Adams Julie_Adams Khandi_Alexander Lexi_Ainsworth Lillian_Adams Lola_Albright Mariann_Aalda Rose_Abdoo Sasha_Alexander Shohreh_Aghdashloo Tatyana_Ali Whitney_Able
Amy_Acker 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Amy_Adams 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Bettye_Ackerman 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Beverly_Aadland 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Brooke_Adams_(actress) 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Candice_Accola 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Caroline_Aaron 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925
Christina_Aguilera 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Diahnne_Abbott 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Dianna_Agron 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Donzaleigh_Abernathy 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Edie_Adams 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Erika_Alexander 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Jaimie_Alexander 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Jane_Adams_(actress) 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Jane_Alexander 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925
Jean_Acker 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925
Jessica_Alba 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Joey_Lauren_Adams 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Julie_Adams 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925
Khandi_Alexander 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925
Lexi_Ainsworth 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Lillian_Adams 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Lola_Albright 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Mariann_Aalda 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Rose_Abdoo 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175
Sasha_Alexander 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Shohreh_Aghdashloo 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Tatyana_Ali 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925
Whitney_Able 0.00925 0.73175 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925 0.00925