Lista de todas las actrices americanas

#This will be used as filter to remove unnecessary links from my DataFrame of movies. Only names listed here will be accepted

listOfActresses <-
  read_html("https://en.wikipedia.org/wiki/List_of_American_film_actresses") %>%
  html_nodes("a") %>%
  html_attr('href') %>%
  data_frame() %>%
  rename(links='.') %>%
  filter(str_detect(links,"/wiki/")) %>%
  filter(!str_detect(links,"#")) %>%
  filter(!str_detect(links,"image")) %>%
  filter(!str_detect(tolower(links),"file"))%>%
  filter(!str_detect(links,"https:")) %>%
  filter(!str_detect(links,":"))

listOfActresses <- as.data.frame(listOfActresses[!grepl("Main_Page", listOfActresses$links),])
colnames(listOfActresses) <- "links"
listOfActresses <- as.data.frame(listOfActresses[!grepl("/wiki/List_of_American_television_actresses", listOfActresses$links),])
colnames(listOfActresses) <- "links"

Actrices que han participado en alguna pelicula en los ultimos 20 anios

Con el proposito de reducir la lista de actirces, solo tome las que han participado activamente en una pelicula en los ulitmos 20 anios.

actressesPerYear <- data.frame(links = character())
for(i in 0:20){
  #Create the link to each movie year starting in 2000 ending 2020
  movieYear <- String(2000 + i)
  movieYear <- paste("https://en.wikipedia.org/wiki/List_of_American_films_of_", movieYear, sep= "")
  auxGetActresses <- getLinks(movieYear)
  auxActressesPerYear <- merge(auxGetActresses, listOfActresses, all = FALSE)
  actressesPerYear <- rbind(auxActressesPerYear,actressesPerYear)
}

totalActresses <- actressesPerYear %>% group_by(links) %>% summarize(references = n()) 
totalActresses <- as.data.frame(actressesPerYear[!grepl("//", actressesPerYear$links),])
colnames(totalActresses) <- "links"
totalActresses <- totalActresses %>% group_by(links) %>% summarize(references = n()) 

Obteniendo referencias dentro de las paginas de las actrices

El proposito es visitar cada pagina de cada actriz, tomar los links que se muestran en esta pagina, visitar cada uno de los links y encontrar referencias a otras actrices para poder ver la relavancia de ellas.

#1 Take the list of actresses
#2 Get the list of actresses of the last 20 years
#3 Take each one of the list of actresses
#4 Get all the links this actress is making reference to
#5 Explore those links
#6 Go over the row and +1 in each actress who appears in this list
#7 Move column

#Empty dataframes used
actressesDF <- data.frame(links = character())
linksDF <- data.frame(links = character())

#Create a squared matrix with rows and columns that match the actresses names
mcMatrix <- matrix(0, nrow = length(totalActresses$links), ncol = length(totalActresses$links))
colnames(mcMatrix) <- totalActresses$links
rownames(mcMatrix) <- totalActresses$links

#Go to each Actress wiki and read all the links inside of it to visit this links and find references to other Actresses
for(i in 1:length(totalActresses$links)){
  #Take each actress links
  actressLinkArg <- String(totalActresses$links[[i]])
  actressLink <- paste("https://en.wikipedia.org", actressLinkArg, sep= "")
  linksDF <- getLinks(actressLink)
  linksDF <- linksDF %>% group_by(links) %>% summarise(references = n())
  
  #Go to each link found and find references to herself or other actresses 
  if(!any(is.na(linksDF$links))){
    for(j in 1:length(linksDF$links)){
      linkArg <- String(linksDF$links[[j]]) 
      referenceLink <- paste("https://en.wikipedia.org", linkArg, sep= "")
      referencesFound <- getLinks(referenceLink)
      
      #Cross check with the list of addresses we have
      intersectActresses <- data.frame(intersect(referencesFound$links,totalActresses$links))
      colnames(intersectActresses)[1] <- "links"
      intersectActresses <- intersectActresses %>% group_by(links) %>% summarise(references = n())
      
      #We make sure there are no NA in the DF and +1 to each reference found
      if(!any(is.na(intersectActresses))){
        if(length(intersectActresses$links) > 0){
          for(k in 1:length(intersectActresses$links)){
            mcMatrix[String(intersectActresses$links[[k]]),actressLinkArg] <- mcMatrix[String(intersectActresses$links[[k]]),actressLinkArg]  + 1
          }
        }
      } 
    }
  }
}

Trabajando con un subset de la matriz

Debido a que el tiempo que tomo la seccion anterior excedio los 3 dias, se decidio detener el analisis y trabajar con los valores obtenidos. Esta matriz es de 150x150 y se presentan en base a estos numeros los analisis obtenidos.

#bckpMcMatrix <- mcMatrix
#mcMatrix <- bckpMcMatrix
#Preparing the subset
subMcMatrix <- mcMatrix[c(1:150),c(1:150)]
sumMcMatrix <- rowSums(subMcMatrix)
transMcMatrix <- subMcMatrix / sumMcMatrix

#Getting the subset of the matrix and creating matrix S, alfa and beta
sMatrix <- matrix(rep(1/150, 22500), nrow = 150)
alfa <- 0.85
beta <- (1-alfa)

#Calculations to get the transition matrix
mC <- alfa*transMcMatrix + beta*sMatrix

Buscando la estabilizacion de la matrix

#We will take the matrix to the power of 100 to see if it stabilize at some point 
mcPwrTen <- mC %^% 10
mcPwrTwenty <- mC %^% 20
mcPwrFifty <- mC %^% 50
mcPwrHundred <- mC %^% 100

Resultados:

Podemos ver que la matriz se estabiliza en la potencia 50 y se puede ver eso en los siguientes subsets

mC a la potencia de 10. Mostrando 9 elementos de la matriz.
print(mcPwrTen[c(1:3),c(1:3)])
                       /wiki/Abigail_Breslin /wiki/Addison_Timlin /wiki/Adrianne_Palicki
/wiki/Abigail_Breslin            0.005975571          0.003403492            0.002693227
/wiki/Addison_Timlin             0.005897302          0.006438972            0.002699677
/wiki/Adrianne_Palicki           0.005947622          0.003549209            0.002957123
mC a la potencia de 20. Mostrando 9 elementos de la matriz.
print(mcPwrTwenty[c(1:3),c(1:3)])
                       /wiki/Abigail_Breslin /wiki/Addison_Timlin /wiki/Adrianne_Palicki
/wiki/Abigail_Breslin            0.005966690          0.003428729            0.002703070
/wiki/Addison_Timlin             0.005965928          0.003441561            0.002703620
/wiki/Adrianne_Palicki           0.005966566          0.003430695            0.002705044
mC a la potencia de 50. Mostrando 9 elementos de la matriz.
print(mcPwrFifty[c(1:3),c(1:3)])
                       /wiki/Abigail_Breslin /wiki/Addison_Timlin /wiki/Adrianne_Palicki
/wiki/Abigail_Breslin            0.005966449          0.003428928            0.002703166
/wiki/Addison_Timlin             0.005966449          0.003428929            0.002703166
/wiki/Adrianne_Palicki           0.005966449          0.003428929            0.002703166
mC a la potencia de 100. Mostrando 9 elementos de la matriz.
print(mcPwrHundred[c(1:3),c(1:3)])
                       /wiki/Abigail_Breslin /wiki/Addison_Timlin /wiki/Adrianne_Palicki
/wiki/Abigail_Breslin            0.005966449          0.003428928            0.002703166
/wiki/Addison_Timlin             0.005966449          0.003428928            0.002703166
/wiki/Adrianne_Palicki           0.005966449          0.003428928            0.002703166
Las actrices mas relevantes de las primeras 150.
diagSubMcMatrix <- diag(mC)
print(diagSubMcMatrix)
           /wiki/Abigail_Breslin             /wiki/Addison_Timlin 
                      0.18233333                       0.54995833 
          /wiki/Adrianne_Palicki           /wiki/Adrienne_Barbeau 
                      0.38159701                       0.46295652 
           /wiki/Adrienne_Shelly            /wiki/Agnes_Moorehead 
                      0.44117857                       0.11511686 
           /wiki/Ahna_O%27Reilly               /wiki/Aimee_Garcia 
                      0.26544444                       0.51100000 
           /wiki/Aimee_Teegarden                /wiki/Aisha_Tyler 
                      0.63850000                       0.36463636 
               /wiki/AJ_Michalka               /wiki/Alanna_Ubach 
                      0.43662500                       0.46141667 
             /wiki/Alex_Borstein              /wiki/Alexa_Davalos 
                      0.18239665                       0.56766667 
                /wiki/Alexa_Vega     /wiki/Alexandra_Breckenridge 
                      0.57125316                       0.25058932 
        /wiki/Alexandra_Daddario           /wiki/Alexandra_Holden 
                      0.40791489                       0.41854386 
           /wiki/Alexandra_Shipp             /wiki/Alexie_Gilmore 
                      0.38263265                       0.66621739 
           /wiki/Alexis_Arquette              /wiki/Alexis_Bledel 
                      0.42600000                       0.13671429 
             /wiki/Alexis_Dziena               /wiki/Alexis_Knapp 
                      0.38263265                       0.34100000 
             /wiki/Alfre_Woodard                 /wiki/Ali_Larter 
                      0.12708233                       0.16829622 
              /wiki/Alia_Shawkat             /wiki/Alice_Drummond 
                      0.32898913                       0.36303704 
             /wiki/Alice_Greczyn                /wiki/Alicia_Keys 
                      0.72023077                       0.32335948 
        /wiki/Alicia_Silverstone                /wiki/Alicia_Witt 
                      0.15014703                       0.42600000 
               /wiki/Alison_Brie            /wiki/Alison_Eastwood 
                      0.26977470                       0.56260714 
             /wiki/Alison_Lohman              /wiki/Allie_DeBerry 
                      0.28433333                       0.77372727 
            /wiki/Allison_Janney /wiki/Allison_Williams_(actress) 
                      0.12370983                       0.19608197 
               /wiki/Ally_Sheedy               /wiki/Aly_Michalka 
                      0.10875591                       0.37573118 
           /wiki/Alyson_Hannigan              /wiki/Alyson_Stoner 
                      0.30170755                       0.39887234 
               /wiki/Alyssa_Diaz              /wiki/Alyssa_Milano 
                      0.49683333                       0.39442857 
              /wiki/Amanda_Bynes              /wiki/Amanda_Detmer 
                      0.20437423                       0.51100000 
               /wiki/Amanda_Peet            /wiki/Amanda_Righetti 
                      0.29804301                       0.34559459 
           /wiki/Amanda_Seyfried               /wiki/Amber_Benson 
                      0.21617056                       0.48671429 
               /wiki/Amber_Heard              /wiki/Amber_Stevens 
                      0.42885235                       0.19933333 
             /wiki/Amber_Tamblyn             /wiki/Amber_Valletta 
                      0.19181633                       0.52858621 
           /wiki/America_Ferrera                  /wiki/Amy_Acker 
                      0.15411258                       0.35889474 
                 /wiki/Amy_Adams              /wiki/Amy_Brenneman 
                      0.08850351                       0.17493822 
                /wiki/Amy_Irving                /wiki/Amy_Madigan 
                      0.36011854                       0.15160060 
               /wiki/Amy_Poehler                   /wiki/Amy_Ryan 
                      0.12837003                       0.09743224 
               /wiki/Amy_Schumer                /wiki/Amy_Sedaris 
                      0.15987224                       0.43215942 
                 /wiki/Amy_Smart               /wiki/Ana_Gasteyer 
                      0.19983041                       0.32239303 
           /wiki/Analeigh_Tipton            /wiki/Andie_MacDowell 
                      0.69826563                       0.09895194 
              /wiki/Andrea_Bowen              /wiki/Andrea_Martin 
                      0.25376316                       0.28194233 
            /wiki/Angela_Bassett              /wiki/Angela_Bettis 
                      0.09690164                       0.52988889 
           /wiki/Angela_Lansbury             /wiki/Angelina_Jolie 
                      0.16508584                       0.10550654 
           /wiki/Angie_Dickinson               /wiki/Angie_Harmon 
                      0.15905785                       0.27687719 
           /wiki/Anika_Noni_Rose            /wiki/Anjelica_Huston 
                      0.24947884                       0.09624313 
                /wiki/Ann_Cusack                   /wiki/Ann_Dowd 
                      0.47912500                       0.11213269 
              /wiki/Ann_Robinson                /wiki/Ann-Margret 
                      0.63850000                       0.10269695 
                 /wiki/Anna_Camp              /wiki/Anna_Chlumsky 
                      0.32046309                       0.17082030 
                /wiki/Anna_Faris                  /wiki/Anna_Gunn 
                      0.22547183                       0.18394702 
             /wiki/Anna_Kendrick          /wiki/Annabella_Sciorra 
                      0.10668783                       0.48789320 
             /wiki/Annabeth_Gish           /wiki/AnnaLynne_McCord 
                      0.38975969                       0.56200000 
           /wiki/AnnaSophia_Robb                /wiki/Anne_Archer 
                      0.29886325                       0.35516667 
             /wiki/Anne_Bancroft                 /wiki/Anne_Dudek 
                      0.12170000                       0.35889474 
             /wiki/Anne_Hathaway                 /wiki/Anne_Heche 
                      0.09600745                       0.14479699 
            /wiki/Annette_Bening               /wiki/Annie_Mumolo 
                      0.09016457                       0.46766667 
             /wiki/Annie_Parisse                /wiki/Annie_Potts 
                      0.35100000                       0.32506250 
               /wiki/Ari_Graynor               /wiki/Ariel_Winter 
                      0.46463636                       0.19483446 
            /wiki/Arielle_Kebbel           /wiki/Ashanti_(singer) 
                      0.49927586                       0.32947458 
     /wiki/Ashley_Bell_(actress)              /wiki/Ashley_Benson 
                      0.35516667                       0.31742336 
               /wiki/Ashley_Fink              /wiki/Ashley_Greene 
                      0.44174074                       0.27687719 
            /wiki/Ashley_Hinshaw                /wiki/Ashley_Judd 
                      0.56766667                       0.15991759 
              /wiki/Ashley_Olsen            /wiki/Ashley_Rickards 
                      0.40752174                       0.46463636 
              /wiki/Ashley_Scott             /wiki/Ashley_Tisdale 
                      0.56766667                       0.20886026 
              /wiki/Aubrey_Plaza             /wiki/Aunjanue_Ellis 
                      0.28131915                       0.36804545 
                 /wiki/Awkwafina                 /wiki/Azura_Skye 
                      0.07322612                       0.46141667 
                  /wiki/Bai_Ling             /wiki/Bailee_Madison 
                      0.18588064                       0.57281818 
          /wiki/Barbara_Crampton            /wiki/Barbara_Hershey 
                      0.53567742                       0.12655918 
          /wiki/Barbra_Streisand           /wiki/Beanie_Feldstein 
                      0.22428509                       0.24385714 
             /wiki/Bebe_Neuwirth               /wiki/Bella_Thorne 
                      0.17526093                       0.37122222 
                /wiki/Beth_Behrs              /wiki/Betsy_Russell 
                      0.31266667                       0.61651724 
              /wiki/Bette_Midler              /wiki/Betty_Buckley 
                      0.16534136                       0.22600000 
               /wiki/Betty_White          /wiki/Beverley_Mitchell 
                      0.18414539                       0.43467347 
        /wiki/Beverly_D%27Angelo       /wiki/Beyonc%C3%A9_Knowles 
                      0.38530851                       0.36329508 
            /wiki/Bianca_Kajlich              /wiki/Bianca_Lawson 
                      0.53618519                       0.41198901 
            /wiki/Bijou_Phillips               /wiki/Billie_Lourd 
                      0.58594624                       0.33526966 
               /wiki/Blair_Brown               /wiki/Blake_Lively 
                      0.16199641                       0.17819870 
            /wiki/Blanchard_Ryan              /wiki/Blythe_Danner 
                      0.07135473                       0.12526348 
               /wiki/Bonnie_Hunt          /wiki/Bonnie_Somerville 
                      0.17895100                       0.44016667 
            /wiki/Brandy_Norwood                /wiki/Bree_Turner 
                      0.32732184                       0.54405556 
               /wiki/Brenda_Song              /wiki/Briana_Evigan 
                      0.44836842                       0.16773774 
        /wiki/Brianna_Hildebrand              /wiki/Bridget_Fonda 
                      0.40752174                       0.08006977 
