1.Create a normalized (BCNF) relational schema and visualize the schema in an ERD for the data in the XML file. Include the ERD in your R Notebook.
library(sqldf)
## Warning: package 'sqldf' was built under R version 3.6.3
## Loading required package: gsubfn
## Warning: package 'gsubfn' was built under R version 3.6.3
## Loading required package: proto
## Warning: package 'proto' was built under R version 3.6.3
## Loading required package: RSQLite
## Warning: package 'RSQLite' was built under R version 3.6.3
library(XML)
## Warning: package 'XML' was built under R version 3.6.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
2.Create a SQLite database that implement the schema, i.e., define the tables with CREATE TABLE. Use SQL chunks in your R Notebook.
library(dplyr)
library(RSQLite)
con <- dbConnect(RSQLite::SQLite(), "Bookstores.db")
dbExecute(con, "PRAGMA foreign_keys = OFF;")
## [1] 0
dbExecute(con, "CREATE TABLE Title
(
Title_Id INTEGER NOT NULL,
Title TEXT,
Price REAL,
PublishDate date,
Edition NUMERIC,
Description TEXT,
CONSTRAINT PK_Title PRIMARY KEY (Title_Id)
);
")
## [1] 0
dbExecute(con, "CREATE TABLE Author
(
Author_Id INTEGER NOT NULL,
AuthorName TEXT,
CONSTRAINT PK_Author PRIMARY KEY (Author_Id)
);
")
## [1] 0
dbExecute(con, "CREATE TABLE Genre
(
Genre_Id NONE NOT NULL,
Genre NONE,
CONSTRAINT PK_Genre PRIMARY KEY (Genre_Id)
);
")
## [1] 0
dbExecute(con, "CREATE TABLE Authorship
(
Title_Id INTEGER NOT NULL,
Author_Id INTEGER NOT NULL,
CONSTRAINT PK_Authorship PRIMARY KEY (Title_Id,Author_Id),
CONSTRAINT has FOREIGN KEY (Title_Id) REFERENCES Title (Title_Id),
CONSTRAINT has FOREIGN KEY (Author_Id) REFERENCES Author (Author_Id)
);
")
## [1] 0
dbExecute(con, "CREATE TABLE has_Genre
(
Title_Id INTEGER NOT NULL,
Genre_Id NONE NOT NULL,
CONSTRAINT PK_has_Genre PRIMARY KEY (Title_Id,Genre_Id),
CONSTRAINT has FOREIGN KEY (Title_Id) REFERENCES Title (Title_Id),
CONSTRAINT has FOREIGN KEY (Genre_Id) REFERENCES Genre (Genre_Id)
);
")
## [1] 0
3.Load the XML data from the file into R data frames; you will need to use either node-by-node traversal of the XML tree or a combination of node-by-node traversal with XPath; you likely will not be able to accomplish it with only XPath. Use surrogate keys and/or the ID attributes in the XML.
My_XML<-xmlParse(file="F:\\IE Courses\\Database Management Systems\\Assignments\\Assignment 9\\Books-v3.xml")
root = xmlRoot(My_XML)
Title<-xmlSApply(root,function(x)xmlValue(x[['title']]))
Price<-xmlSApply(root,function(x)xmlValue(x[['price']]))
PublishDate<-xmlSApply(root,function(x)xmlValue(x[['publish_date']]))
Description<-xmlSApply(root,function(x)xmlValue(x[['description']]))
Edition<-xmlSApply(root,function(x)xmlValue(x[['edition']]))
Author<-xmlSApply(root,function(x)xmlValue(x[['author']]))
Genre<-xmlSApply(root,function(x)xmlValue(x[['genre']]))
4.Transform data types as necessary and then write the data frames to the appropriate tables in the database.
df.Title<-data.frame("Title"=Title,"Author"=Author,"Genre"=Genre,"Price"=Price,"PublishDate"=PublishDate,"Edition"=Edition,"Description"=Description)
df.Author<-tibble::rowid_to_column(data.frame("AuthorName"=unique(Author)), "Author_Id")
df.Genre<-tibble::rowid_to_column(data.frame("Genre"=unique(Genre)), "Genre_Id")
df.Title<-tibble::rowid_to_column(df.Title, "Title_Id")
df.Title$Price<- as.factor(df.Title$Price)
df.Authorship<-merge(df.Title[,c(1,2,3)], df.Author, by.x = "Author", by.y = "AuthorName")
df.hasGenre<-merge(df.Title[,c(1,2,4)], df.Genre, by.x = "Genre", by.y = "Genre")
dbWriteTable(con,"Author",df.Author, append=TRUE)
dbWriteTable(con,"Genre",df.Genre, append=TRUE)
dbWriteTable(con,"Title",df.Title[,-c(3,4)], append=TRUE)
dbWriteTable(con,"Authorship",df.Authorship[,c(4,2)], append=TRUE)
dbWriteTable(con,"hasGenre",df.hasGenre[,c(4,2)], append=TRUE)
5 A. What are the titles and prices of all books written by “Galos, Mike”? List the titles and the prices.
dbGetQuery(con,"select T.Title,T.Price from Title T inner join Authorship AU on t.Title_Id=AU.Title_Id inner join Author A on AU.Author_Id=A.Author_Id where A.AuthorName='Galos, Mike' ")
B.What is the most recent year of publication of all books written by “O’Brien, Tim”.
dbGetQuery(con,"select T.PublishDate from Title T inner join Authorship AU on t.Title_Id=AU.Title_Id inner join Author A on AU.Author_Id=A.Author_Id where A.AuthorName like '%Tim' order by T.PublishDate desc limit 1")
C.What is the average price of all books in the “Fantasy” genre.
dbGetQuery(con,"select avg(T.Price) as AvgPrice from Title T inner join hasGenre GR on T.Title_Id=GR.Title_Id inner join Genre G on GR.Genre_Id=G.Genre_Id where G.Genre='Fantasy' ")
D.Find the number of books in each genre.
dbGetQuery(con,"select G.Genre,count(*) as NumBooks from Title T inner join hasGenre GR on T.Title_Id=GR.Title_Id inner join Genre G on GR.Genre_Id=G.Genre_Id group by G.Genre ")
E.List the title and author of all books that cost less than the average price of books.
dbGetQuery(con,"select T.Title,A.AuthorName from Title T inner join Authorship AU on T.Title_Id=AU.Title_Id inner join Author A on AU.Author_Id=A.Author_Id where T.Price>6.35")