#activate the libraries
#install.packages('XML')
suppressWarnings(suppressMessages(library(XML)))
#install.packages('RCurl')
suppressWarnings(suppressMessages(library(RCurl)))
#install.packages('bitops')
suppressWarnings(suppressMessages(library(bitops)))
#install.packages('rjson')
suppressWarnings(suppressMessages(library(rjson)))
#install.packages("tidyr")
suppressWarnings(suppressMessages(library(tidyr)))
#install.packages("dplyr")
suppressWarnings(suppressMessages(library(dplyr)))
#install.packages("stringr")
suppressWarnings(suppressMessages(library(stringr)))
- Importing data from HTML table into R data frame.
#assign html url to html_url variable
html_url <- getURL("https://raw.githubusercontent.com/olga0503/DATA-607/master/books.html")
#read html table
html_data <- readHTMLTable(html_url, stringsAsFactors = FALSE)
html_data
## $`NULL`
## Title Author Year Published
## 1 To Kill a Mockingbird Harper Lee 1960
## 2 The Great Gatsby F. Scott Fitzgerald 1925
## 3 The Grapes of Wrath John Steinbeck 1939
## Category
## 1 Southern Gothic
## 2 Historical Fiction
## 3 Novel
- Importing data from XML file into R data frame.
#assign xml url to xmll_url variable
xml_url<-getURL("https://raw.githubusercontent.com/olga0503/DATA-607/master/books.xml")
#read xml table
xml_data<-xmlToDataFrame(xml_url, stringsAsFactors = FALSE)
xml_data
## title author year_published
## 1 To Kill a Mockingbird Harper Lee 1960
## 2 The Great Gatsby F. Scott Fitzgerald 1925
## 3 The Grapes of Wrath John Steinbeck 1939
## category
## 1 Southern Gothic
## 2 Historical Fiction
## 3 Novel
- Importing data from JSON file into R data frame.
#import data from json file
json_data <- fromJSON(file= "https://raw.githubusercontent.com/olga0503/DATA-607/master/books.json" )
#convert data to data frame
json_data_frame<-as.data.frame(json_data, stringsAsFactors = FALSE)
#transforming data to a proper format
json_data_frame %>% gather(column_title, data, colnames(json_data_frame)[1]:colnames(json_data_frame)[12]) %>% mutate(column_title = str_replace(column_title, "books..", "")) %>% separate(column_title, c("column_title", "number"),sep = "\\.") %>% spread(column_title,data) %>% select(title, author,year_published,category,-number)
## title author year_published
## 1 The Great Gatsby F. Scott Fitzgerald 1925
## 2 The Grapes of Wrath John Steinbeck 1939
## 3 To Kill a Mockingbird Harper Lee 1970
## category
## 1 Historical Fiction
## 2 Novel
## 3 Southern Gothic