#activate the libraries
#install.packages('XML')
suppressWarnings(suppressMessages(library(XML)))
#install.packages('RCurl')
suppressWarnings(suppressMessages(library(RCurl)))
#install.packages('bitops')
suppressWarnings(suppressMessages(library(bitops)))
#install.packages('rjson')
suppressWarnings(suppressMessages(library(rjson)))
#install.packages("tidyr")
suppressWarnings(suppressMessages(library(tidyr)))
#install.packages("dplyr")
suppressWarnings(suppressMessages(library(dplyr)))
#install.packages("stringr")
suppressWarnings(suppressMessages(library(stringr)))
  1. Importing data from HTML table into R data frame.
#assign html url to html_url variable
html_url <- getURL("https://raw.githubusercontent.com/olga0503/DATA-607/master/books.html")

#read html table
html_data <- readHTMLTable(html_url, stringsAsFactors = FALSE)

html_data
## $`NULL`
##                   Title              Author Year Published
## 1 To Kill a Mockingbird          Harper Lee           1960
## 2      The Great Gatsby F. Scott Fitzgerald           1925
## 3   The Grapes of Wrath      John Steinbeck           1939
##             Category
## 1    Southern Gothic
## 2 Historical Fiction
## 3              Novel
  1. Importing data from XML file into R data frame.
#assign xml url to xmll_url variable
xml_url<-getURL("https://raw.githubusercontent.com/olga0503/DATA-607/master/books.xml")

#read xml table
xml_data<-xmlToDataFrame(xml_url, stringsAsFactors = FALSE)

xml_data
##                   title              author year_published
## 1 To Kill a Mockingbird          Harper Lee           1960
## 2      The Great Gatsby F. Scott Fitzgerald           1925
## 3   The Grapes of Wrath      John Steinbeck           1939
##             category
## 1    Southern Gothic
## 2 Historical Fiction
## 3              Novel
  1. Importing data from JSON file into R data frame.
#import data from json file
json_data <- fromJSON(file= "https://raw.githubusercontent.com/olga0503/DATA-607/master/books.json" )

#convert data to data frame
json_data_frame<-as.data.frame(json_data, stringsAsFactors = FALSE)

#transforming data to a proper format
json_data_frame %>% gather(column_title, data, colnames(json_data_frame)[1]:colnames(json_data_frame)[12]) %>% mutate(column_title = str_replace(column_title, "books..", ""))  %>% separate(column_title, c("column_title", "number"),sep = "\\.")  %>% spread(column_title,data) %>% select(title, author,year_published,category,-number)
##                   title              author year_published
## 1      The Great Gatsby F. Scott Fitzgerald           1925
## 2   The Grapes of Wrath      John Steinbeck           1939
## 3 To Kill a Mockingbird          Harper Lee           1970
##             category
## 1 Historical Fiction
## 2              Novel
## 3    Southern Gothic