library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(RCurl)
library(jsonlite)
#sport_books_json <- read_json("/Users/briansingh/Desktop/CUNY/Data607/Week7/sportbooks.json",simplifyVector=TRUE)
sport_books_json <- read_json("https://raw.githubusercontent.com/brsingh7/DATA607/main/Week7/sportbooks.json",simplifyVector=TRUE)
sport_books_json2 <- as.data.frame(sport_books_json)
sport_books_json2
## Favorite.Sport.Books.Title Favorite.Sport.Books.Author.s.
## 1 Friday Night Lights H.G. Bizzinger, Buzz Bizzinger
## 2 Moneyball: The Art of Winning an Unfair Game Michael Lewis
## 3 The Mamba Mentality: How I Play Kobe Bryant
## Favorite.Sport.Books.Released.Date Favorite.Sport.Books.Rating..Amazon.
## 1 8/11/15 4.5
## 2 3/17/04 4.7
## 3 10/23/18 4.9
I was having trouble with XML. I validated the XML I created and it states it is valid. I’m not sure how to proceed into reading in and converting to a data frame.
library(XML)
url<- "https://raw.githubusercontent.com/brsingh7/DATA607/main/Week7/sportbooks.xml"
data<-getURL(url)
sport_books_xml <- xmlTreeParse(data,useInternalNodes = TRUE)
bind_rows(xpathApply(sport_books_xml, "//Frame", function(x) {
parent <- data.frame(as.list(xmlAttrs(x)), stringsAsFactors=FALSE)
kids <- bind_rows(lapply(xmlChildren(x), function(x) as.list(xmlAttrs(x))))
cbind.data.frame(parent, kids, stringsAsFactors=FALSE)
}))
## # A tibble: 0 × 0
library(XML)
url2<- "https://raw.githubusercontent.com/brsingh7/DATA607/main/Week7/sportbooks.html"
data2<-getURL(url2)
sport_books_html <- htmlTreeParse(data2,useInternalNodes = T)
sport_books_html2 <- readHTMLTable(sport_books_html, stringAsFactors = FALSE)
sport_books_html2
## $`NULL`
## Title Author(s)
## 1 Friday Night Lights H.G. Bizzinger, Buzz Bizzinger
## 2 Moneyball: The Art of Winning an Unfair Game Michael Lewis
## 3 The Mamba Mentality: How I Play Kobe Bryant
## Release Date Rating
## 1 8/11/2015 4.5
## 2 3/17/2004 4.7
## 3 10/23/2018 4.9
sport_books_html3 <- sapply(sport_books_html2[[1]][,-1], FUN= function(x)
as.character(gsub(",", "", as.character(x), fixed = TRUE) ))
sport_books_html3<-as.data.frame(substring(sport_books_html3,1), stringsAsFactors=FALSE)
names(sport_books_html3)<-c("Author(s)","Release_Date","Rating")
sport_books_html3$Title<-sport_books_html2[[1]][,1]
sport_books_html3 <- sport_books_html3[,c(4,1,2,3)]
sport_books_html3
## Title Author(s)
## 1 Friday Night Lights H.G. Bizzinger Buzz Bizzinger
## 2 Moneyball: The Art of Winning an Unfair Game Michael Lewis
## 3 The Mamba Mentality: How I Play Kobe Bryant
## Release_Date Rating
## 1 8/11/2015 4.5
## 2 3/17/2004 4.7
## 3 10/23/2018 4.9
My data frames (the ones I was able to successfully create) are pretty much identical. With a little more understanding of the files and how to use within R, I think they’d be identical regardless of type (HTML, XML, JSON).