##
## Attaching package: 'jsonlite'
##
## The following object is masked from 'package:utils':
##
## View
##
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
books.html.url <- "C:/Senthil/MSDataAnalytics/Semester1/Assignments/IS607/Week9Assignment/books.html"
#books.html.url <- getURL("https://raw.githubusercontent.com/senthiltamil/CUNY_MSDA/master/Week9_Assignment/books.html")
books.html.data <- readHTMLTable(books.html.url,header = F)
books.html.data.df <- as.data.frame(books.html.data)
str(books.html.data.df)
## 'data.frame': 21 obs. of 2 variables:
## $ NULL.V1: Factor w/ 7 levels "Author","BookName",..: 2 1 7 6 5 3 4 2 1 7 ...
## $ NULL.V2: Factor w/ 20 levels "0","0521644453",..: 19 15 5 20 9 4 12 18 13 7 ...
print(books.html.data.df)
## NULL.V1
## 1 BookName
## 2 Author
## 3 Price
## 4 KindVersionAvailable
## 5 KindlePrice
## 6 ISBN-10
## 7 ISBN-13
## 8 BookName
## 9 Author
## 10 Price
## 11 KindVersionAvailable
## 12 KindlePrice
## 13 ISBN-10
## 14 ISBN-13
## 15 BookName
## 16 Author
## 17 Price
## 18 KindVersionAvailable
## 19 KindlePrice
## 20 ISBN-10
## 21 ISBN-13
## NULL.V2
## 1 SQL Server Internals: In-Memory OLTP Inside the SQL Server 2014 Hekaton Engine
## 2 Kalen Delaney
## 3 26.99
## 4 yes
## 5 9.99
## 6 1910035033
## 7 978-1910035030
## 8 Professional Microsoft SQL Server 2014 Administration
## 9 Adam Jorgensen, Bradley Ball, Steven Wort, Ross LoForte, Brian Knight
## 10 36.58
## 11 yes
## 12 31.49
## 13 1118859138
## 14 978-1118859131
## 15 Probability and Random Variables: A Beginner's Guide
## 16 David Stirzaker
## 17 50.47
## 18 No
## 19 0
## 20 0521644453
## 21 978-0521644457
#books.html.url <- getURL("https://raw.githubusercontent.com/senthiltamil/CUNY_MSDA/master/Week9_Assignment/books1.html")
books1.html.url <- "C:/Senthil/MSDataAnalytics/Semester1/Assignments/IS607/Week9Assignment/books1.html"
books1.html.data <- readHTMLTable(books1.html.url,header = T)
books1.html.data.df <- as.data.frame(books1.html.data)
str(books1.html.data.df)
## 'data.frame': 3 obs. of 7 variables:
## $ NULL.BookName : Factor w/ 3 levels "Probability and Random Variables: A Beginner's Guide",..: 3 2 1
## $ NULL.Author : Factor w/ 3 levels "Adam Jorgensen, Bradley Ball, Steven Wort, Ross LoForte, Brian Knight",..: 3 1 2
## $ NULL.Price : Factor w/ 3 levels "26.99","36.58",..: 1 2 3
## $ NULL.KindVersionAvailable: Factor w/ 2 levels "No","yes": 2 2 1
## $ NULL.KindlePrice : Factor w/ 3 levels "0","31.49","9.99": 3 2 1
## $ NULL.ISBN.10 : Factor w/ 3 levels "0521644453","1118859138",..: 3 2 1
## $ NULL.ISBN.13 : Factor w/ 3 levels "978-0521644457",..: 3 2 1
print(books1.html.data.df)
## NULL.BookName
## 1 SQL Server Internals: In-Memory OLTP Inside the SQL Server 2014 Hekaton Engine
## 2 Professional Microsoft SQL Server 2014 Administration
## 3 Probability and Random Variables: A Beginner's Guide
## NULL.Author
## 1 Kalen Delaney
## 2 Adam Jorgensen, Bradley Ball, Steven Wort, Ross LoForte, Brian Knight
## 3 David Stirzaker
## NULL.Price NULL.KindVersionAvailable NULL.KindlePrice NULL.ISBN.10
## 1 26.99 yes 9.99 1910035033
## 2 36.58 yes 31.49 1118859138
## 3 50.47 No 0 0521644453
## NULL.ISBN.13
## 1 978-1910035030
## 2 978-1118859131
## 3 978-0521644457
#books.html.url <- getURL("https://raw.githubusercontent.com/senthiltamil/CUNY_MSDA/master/Week9_Assignment/books.xml")
books.xml.url <- "C:/Senthil/MSDataAnalytics/Semester1/Assignments/IS607/Week9Assignment/books.xml"
books.xml.data <- xmlParse(books.xml.url)
books.xml.list <- xmlToList(books.xml.data)
books.xml.df <- ldply(books.xml.list, data.frame)
str(books.xml.df)
## 'data.frame': 9 obs. of 11 variables:
## $ .id : chr "book" "book" "book" "book" ...
## $ author : Factor w/ 2 levels "Kalen Delaney",..: 1 1 1 NA NA NA 2 2 2
## $ price : Factor w/ 3 levels "26.99","36.58",..: 1 1 1 2 2 2 3 3 3
## $ kindleAvailable : Factor w/ 2 levels "yes","no": 1 1 1 1 1 1 2 2 2
## $ KindlePrice : Factor w/ 3 levels "9.99","31.49",..: 1 1 1 2 2 2 3 3 3
## $ .attrs : Factor w/ 9 levels "1910035033","978-1910035030",..: 3 1 2 6 4 5 9 7 8
## $ authors.author : Factor w/ 1 level "Adam Jorgensen": NA NA NA 1 1 1 NA NA NA
## $ authors.author.1: Factor w/ 1 level "Bradley Ball": NA NA NA 1 1 1 NA NA NA
## $ authors.author.2: Factor w/ 1 level "Steven Wort": NA NA NA 1 1 1 NA NA NA
## $ authors.author.3: Factor w/ 1 level "Ross LoForte": NA NA NA 1 1 1 NA NA NA
## $ authors.author.4: Factor w/ 1 level "Brian Knight": NA NA NA 1 1 1 NA NA NA
print(books.xml.df)
## .id author price kindleAvailable KindlePrice
## 1 book Kalen Delaney 26.99 yes 9.99
## 2 book Kalen Delaney 26.99 yes 9.99
## 3 book Kalen Delaney 26.99 yes 9.99
## 4 book <NA> 36.58 yes 31.49
## 5 book <NA> 36.58 yes 31.49
## 6 book <NA> 36.58 yes 31.49
## 7 book David Stirzaker 50.47 no 0
## 8 book David Stirzaker 50.47 no 0
## 9 book David Stirzaker 50.47 no 0
## .attrs
## 1 SQL Server Internals: In-Memory OLTP Inside the SQL Server 2014 Hekaton Engine
## 2 1910035033
## 3 978-1910035030
## 4 Professional Microsoft SQL Server 2014 Administration
## 5 1118859138
## 6 978-1118859131
## 7 Probability and Random Variables: A Beginner's Guide
## 8 0521644453
## 9 978-0521644457
## authors.author authors.author.1 authors.author.2 authors.author.3
## 1 <NA> <NA> <NA> <NA>
## 2 <NA> <NA> <NA> <NA>
## 3 <NA> <NA> <NA> <NA>
## 4 Adam Jorgensen Bradley Ball Steven Wort Ross LoForte
## 5 Adam Jorgensen Bradley Ball Steven Wort Ross LoForte
## 6 Adam Jorgensen Bradley Ball Steven Wort Ross LoForte
## 7 <NA> <NA> <NA> <NA>
## 8 <NA> <NA> <NA> <NA>
## 9 <NA> <NA> <NA> <NA>
## authors.author.4
## 1 <NA>
## 2 <NA>
## 3 <NA>
## 4 Brian Knight
## 5 Brian Knight
## 6 Brian Knight
## 7 <NA>
## 8 <NA>
## 9 <NA>
#books.html.url <- getURL("https://raw.githubusercontent.com/senthiltamil/CUNY_MSDA/master/Week9_Assignment/books.json")
books.json.url <- "C:/Senthil/MSDataAnalytics/Semester1/Assignments/IS607/Week9Assignment/books.json"
books.json.data <- fromJSON(txt=books.json.url)
books.json.df1 <- as.data.frame(books.json.data)
str(books.json.df1)
## 'data.frame': 3 obs. of 7 variables:
## $ books.name : chr "SQL Server Internals: In-Memory OLTP Inside the SQL Server 2014 Hekaton Engine" "Professional Microsoft SQL Server 2014 Administration" "Probability and Random Variables: A Beginner's Guide"
## $ books.isbn.10 : chr "1910035033" "1910035033" "0521644453"
## $ books.isbn.13 : chr "978-1910035030" "978-1910035030" "978-0521644457"
## $ books.authors :List of 3
## ..$ : chr "Kalen Delaney"
## ..$ : chr "Adam Jorgensen" "Bradley Ball" "Steven Wort" "Ross LoForte" ...
## ..$ : chr "David Stirzaker"
## $ books.price : num 27 36.6 50.5
## $ books.kindleAvailable: chr "yes" "yes" "no"
## $ books.kindlePrice : chr "9.99" "31.49" NA
print(books.json.df1)
## books.name
## 1 SQL Server Internals: In-Memory OLTP Inside the SQL Server 2014 Hekaton Engine
## 2 Professional Microsoft SQL Server 2014 Administration
## 3 Probability and Random Variables: A Beginner's Guide
## books.isbn.10 books.isbn.13
## 1 1910035033 978-1910035030
## 2 1910035033 978-1910035030
## 3 0521644453 978-0521644457
## books.authors
## 1 Kalen Delaney
## 2 Adam Jorgensen, Bradley Ball, Steven Wort, Ross LoForte, Brian Knight
## 3 David Stirzaker
## books.price books.kindleAvailable books.kindlePrice
## 1 26.99 yes 9.99
## 2 36.58 yes 31.49
## 3 50.47 no <NA>