install.packages("foreign")
library(foreign)
help(package=foreign)
test.stata <- read.dta("https://stats.idre.ucla.edu/stat/data/test.dta")
print(test.stata)
## make model mpg weight price
## 1 AMC Concord 22 2930 4099
## 2 AMC Pacer 17 3350 4749
## 3 AMC Spirit 22 2640 3799
## 4 Buick Century 20 3250 4816
## 5 Buick Electra 15 4080 7827
test.txt <- read.table("https://stats.idre.ucla.edu/wp-content/uploads/2016/02/test.txt", header=T)
print(test.txt)
## make model mpg weight price
## 1 AMC Concord 22 2930 4099
## 2 AMC Pacer 17 3350 4749
## 3 AMC Spirit 22 2640 3799
## 4 Buick Century 20 3250 4816
## 5 Buick Electra 15 4080 7827
test.csv1 <- read.table("https://stats.idre.ucla.edu/wp-content/uploads/2016/02/test-1.csv", header=T, sep=",")
print(test.csv1)
## make model mpg weight price
## 1 amc concord 22 2930 4099
## 2 amc oacer 17 3350 4749
## 3 amc spirit 22 2640 3799
## 4 buick century 20 3250 4816
## 5 buick electra 15 4080 7827
test.csv1 <- read.table("https://stats.idre.ucla.edu/wp-content/uploads/2016/02/test-1.csv", header=T, sep=",")
print(test.csv1)
## make model mpg weight price
## 1 amc concord 22 2930 4099
## 2 amc oacer 17 3350 4749
## 3 amc spirit 22 2640 3799
## 4 buick century 20 3250 4816
## 5 buick electra 15 4080 7827
test.semi <- read.table("https://stats.idre.ucla.edu/wp-content/uploads/2016/02/testsemicolon.txt", header=T, sep=";")
print(test.semi)
## make model mpg weight price
## 1 AMC Concord 22 2930 4099
## 2 AMC Pacer 17 3350 4749
## 3 AMC Spirit 22 2640 3799
## 4 Buick Century 20 3250 4816
## 5 Buick Electra 15 4080 7827
test.z <- read.table("https://stats.idre.ucla.edu/wp-content/uploads/2016/02/testz.txt", header=T, sep="z")
print(test.z)
## make model mpg weight price
## 1 AMC Concord 22 2930 4099
## 2 AMC Pacer 17 3350 4749
## 3 AMC Spirit 22 2640 3799
## 4 Buick Century 20 3250 4816
## 5 Buick Electra 15 4080 7827
write.table(test.csv1, "test1.txt", row.names=F, quote=F)
library(openxlsx)
write.xlsx(test.csv1, "test1.xlsx")
library(tidyverse)
library(rvest)
library(stringr)
#Identificar la url desde donde queremos extraer datos
base_url <- "https://www.chartsecuador.biz/top-25-artistas-ecuatorianos/"
webpage <- read_html(base_url)
# Obtenemos el nombre de los artistas
canciones <- html_nodes(webpage, "p:nth-child(1) span")
canciones <- as.character(html_text(canciones))
canciones
## [1] "LAS 25 CANCIONES DE PRODUCCIÓN NACIONAL O DE ARTISTAS ECUATORIANOS MÁS POPULARES DE LA SEMANA EN ECUADOR. "
## [2] "*10 semanas*"
## [3] "*10 semanas*"
## [4] "Parece Mentira"
## [5] "Prende (Remix)"
## [6] "Ana"
## [7] "No Te Quiero Perder"
## [8] "Cosa Linda"
## [9] "Zapateando Juyayay"
## [10] "Apareciste"
## [11] "Prueba Conmigo"
## [12] "Quiero"
## [13] "Lo Que Pasa"
## [14] "Dejémoslo Aquí"
## [15] "Me Descuidaste"
## [16] ""
## [17] "Mala"
## [18] "Loquita Por Ti"
## [19] "Ya Estoy Bien"
## [20] "Perdón"
## [21] "Pienso En Tí"
## [22] "*REINGRESO*"
## [23] "*REINGRESO*"
## [24] "Envenenado"
## [25] "Por Tí"
## [26] "Arreglarlo Bailando"
## [27] "*NUEVO*"
## [28] "*NUEVO*"
## [29] "*NUEVO*"
## [30] "*REINGRESO*"
## [31] "*REINGRESO*"
## [32] "*REINGRESO*"
canciones = canciones[canciones!='' & canciones!='*NUEVO*' & canciones!='*REINGRESO*' & canciones!='*10 semanas*']
canciones
## [1] "LAS 25 CANCIONES DE PRODUCCIÓN NACIONAL O DE ARTISTAS ECUATORIANOS MÁS POPULARES DE LA SEMANA EN ECUADOR. "
## [2] "Parece Mentira"
## [3] "Prende (Remix)"
## [4] "Ana"
## [5] "No Te Quiero Perder"
## [6] "Cosa Linda"
## [7] "Zapateando Juyayay"
## [8] "Apareciste"
## [9] "Prueba Conmigo"
## [10] "Quiero"
## [11] "Lo Que Pasa"
## [12] "Dejémoslo Aquí"
## [13] "Me Descuidaste"
## [14] "Mala"
## [15] "Loquita Por Ti"
## [16] "Ya Estoy Bien"
## [17] "Perdón"
## [18] "Pienso En Tí"
## [19] "Envenenado"
## [20] "Por Tí"
## [21] "Arreglarlo Bailando"
# Obtener los artistas
artistas <- html_nodes(webpage, xpath = '//*[@id="cc-m-9566740669"]/p[3]')
artistas=html_text(artistas)
artistas = str_squish(gsub("\n", "", artistas))
artistas
## [1] "4 AM Featuring Arévalo"
cancion = "Californication"
html_dir = paste0("https://www.google.com/","search?q=letra+cancion+",gsub(" ","+",cancion))
google = read_html(html_dir)
lirica = google %>%
html_nodes('div div div div div div div div div div div')
cat(html_text(lirica[8]))
## Psychic spies from China try to steal your mind's elation
## And little girls from Sweden dream of silver screen quotation
## And if you want these kind of dreams it's Californication
##
## It's the edge of the world and all of western civilization
## The sun may rise in the East at least it's settled in a final location
## It's understood that Hollywood sells Californication
##
## Pay your surgeon very well to break the spell of aging
## Celebrity skin is this your chin or is that war you're waging?
##
## Firstborn unicorn
## Hardcore soft porn
## Dream of Californication
## Dream of Californication
## Dream of Californication
## Dream of Californication
##
## Marry me girl, be my fairy to the world, be my very own constellation
## A teenage bride with a baby inside getting high on information
## And buy me a star on the boulevard, it's Californication
##
## Space may be the final frontier but it's made in a Hollywood basement
## And Cobain can you hear the spheres singing songs off Station To Station?
## And Alderaan's not far away, it's Californication
##
## Born and raised by those who praise, control of population
## Everybody's been there and I don't mean on vacation
##
## Firstborn unicorn
## Hardcore soft porn
## Dream of Californication
## Dream of Californication
## Dream of Californication
## Dream of Californication
##
## Destruction leads to a very rough road but it also breeds creation
## And earthquakes are to a girl's guitar, they're just another good vibration
## And tidal waves couldn't save the world from Californication
##
## Pay your surgeon very well to break the spell of aging
## Sicker than the rest, there is no test but this is what you're craving?
##
## Firstborn unicorn
## Hardcore soft porn
## Dream of Californication
## Dream of Californication
## Dream of Californication
## Dream of Californication
busqueda = "Jorge Yunda"
news_pag = "https://news.google.com/"
html_dir = paste0(news_pag,"search?q=",gsub(" ","+",busqueda),"&hl=es-419&gl=US&ceid=US:es-419")
google_news = read_html(html_dir)
noticias = google_news %>%
html_nodes(css = "c-wiz div div div div main c-wiz div div div article")
news_pag = "https://news.google.com/"
noticia = noticias[[3]]
titular = noticia %>% html_node("h3") %>% html_text()
fecha = noticia %>% html_node("time") %>% html_attr("datetime")
diario = noticia %>% html_node("a.wEwyrc.AVN2gc.uQIVzc.Sksgp") %>% html_text()
link_enmascarado = noticia %>% html_node("h3 a") %>% html_attr("href")
link_enmascarado = paste0(news_pag,substring(link_enmascarado,3))
link_enmascarado = read_html(link_enmascarado)
link = link_enmascarado %>%
html_node(css='c-wiz div div c-wiz div a') %>%
html_attr("href")
noticiaDF = data.frame(Titular=titular, Fecha=fecha, Diario=diario, Link=link, stringsAsFactors = F)
Buscar un set de datos estructurado y uno no estructurado utilizando web scrapping. Los datos obtenidos deben ser útiles para el tópico de investigación de su grupo.