1.Create a normalized (BCNF) relational schema and visualize the schema in an ERD for the data in the XML file. Include the ERD in your R Notebook. ERD Diagram Design in MySql WorkBench

library(sqldf)
## Warning: package 'sqldf' was built under R version 3.6.3
## Loading required package: gsubfn
## Warning: package 'gsubfn' was built under R version 3.6.3
## Loading required package: proto
## Warning: package 'proto' was built under R version 3.6.3
## Loading required package: RSQLite
## Warning: package 'RSQLite' was built under R version 3.6.3
library(XML)
## Warning: package 'XML' was built under R version 3.6.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

2.Create a SQLite database that implement the schema, i.e., define the tables with CREATE TABLE. Use SQL chunks in your R Notebook.

library(dplyr)
library(RSQLite)

con <- dbConnect(RSQLite::SQLite(), "Bookstores.db")


dbExecute(con, "PRAGMA foreign_keys = OFF;")
## [1] 0
dbExecute(con, "CREATE TABLE Title
(
  Title_Id INTEGER NOT NULL,
  Title TEXT,
  Price REAL,
  PublishDate date,
  Edition NUMERIC,
  Description TEXT,
  CONSTRAINT PK_Title PRIMARY KEY (Title_Id)
);
")
## [1] 0
dbExecute(con, "CREATE TABLE Author
(
  Author_Id INTEGER NOT NULL,
  AuthorName TEXT,
  CONSTRAINT PK_Author PRIMARY KEY (Author_Id)
);
")
## [1] 0
dbExecute(con, "CREATE TABLE Genre
(
  Genre_Id NONE NOT NULL,
  Genre NONE,
  CONSTRAINT PK_Genre PRIMARY KEY (Genre_Id)
);

")
## [1] 0
dbExecute(con, "CREATE TABLE Authorship
(
  Title_Id INTEGER NOT NULL,
  Author_Id INTEGER NOT NULL,
  CONSTRAINT PK_Authorship PRIMARY KEY (Title_Id,Author_Id),
  CONSTRAINT has FOREIGN KEY (Title_Id) REFERENCES Title (Title_Id),
  CONSTRAINT has FOREIGN KEY (Author_Id) REFERENCES Author (Author_Id)
);

")
## [1] 0
dbExecute(con, "CREATE TABLE has_Genre
(
  Title_Id INTEGER NOT NULL,
  Genre_Id NONE NOT NULL,
  CONSTRAINT PK_has_Genre PRIMARY KEY (Title_Id,Genre_Id),
  CONSTRAINT has FOREIGN KEY (Title_Id) REFERENCES Title (Title_Id),
  CONSTRAINT has FOREIGN KEY (Genre_Id) REFERENCES Genre (Genre_Id)
);
")
## [1] 0

3.Load the XML data from the file into R data frames; you will need to use either node-by-node traversal of the XML tree or a combination of node-by-node traversal with XPath; you likely will not be able to accomplish it with only XPath. Use surrogate keys and/or the ID attributes in the XML.

My_XML<-xmlParse(file="F:\\IE Courses\\Database Management Systems\\Assignments\\Assignment 9\\Books-v3.xml")
root = xmlRoot(My_XML)

Title<-xmlSApply(root,function(x)xmlValue(x[['title']]))
Price<-xmlSApply(root,function(x)xmlValue(x[['price']]))
PublishDate<-xmlSApply(root,function(x)xmlValue(x[['publish_date']]))
Description<-xmlSApply(root,function(x)xmlValue(x[['description']]))
Edition<-xmlSApply(root,function(x)xmlValue(x[['edition']]))

Author<-xmlSApply(root,function(x)xmlValue(x[['author']]))
Genre<-xmlSApply(root,function(x)xmlValue(x[['genre']]))

4.Transform data types as necessary and then write the data frames to the appropriate tables in the database.

df.Title<-data.frame("Title"=Title,"Author"=Author,"Genre"=Genre,"Price"=Price,"PublishDate"=PublishDate,"Edition"=Edition,"Description"=Description)
df.Author<-tibble::rowid_to_column(data.frame("AuthorName"=unique(Author)), "Author_Id")
df.Genre<-tibble::rowid_to_column(data.frame("Genre"=unique(Genre)), "Genre_Id")
df.Title<-tibble::rowid_to_column(df.Title, "Title_Id")

df.Title$Price<- as.factor(df.Title$Price)


df.Authorship<-merge(df.Title[,c(1,2,3)], df.Author, by.x = "Author", by.y = "AuthorName")
df.hasGenre<-merge(df.Title[,c(1,2,4)], df.Genre, by.x = "Genre", by.y = "Genre")
dbWriteTable(con,"Author",df.Author, append=TRUE)

dbWriteTable(con,"Genre",df.Genre, append=TRUE)

dbWriteTable(con,"Title",df.Title[,-c(3,4)], append=TRUE)

dbWriteTable(con,"Authorship",df.Authorship[,c(4,2)], append=TRUE)

dbWriteTable(con,"hasGenre",df.hasGenre[,c(4,2)], append=TRUE)

5 A. What are the titles and prices of all books written by “Galos, Mike”? List the titles and the prices.

dbGetQuery(con,"select T.Title,T.Price from Title T inner join Authorship AU on t.Title_Id=AU.Title_Id inner join Author A on AU.Author_Id=A.Author_Id where A.AuthorName='Galos, Mike'  ")

B.What is the most recent year of publication of all books written by “O’Brien, Tim”.

dbGetQuery(con,"select T.PublishDate from Title T inner join Authorship AU on t.Title_Id=AU.Title_Id inner join Author A on AU.Author_Id=A.Author_Id where A.AuthorName like '%Tim'  order by T.PublishDate desc limit 1")

C.What is the average price of all books in the “Fantasy” genre.

dbGetQuery(con,"select avg(T.Price) as AvgPrice from Title T inner join hasGenre GR on T.Title_Id=GR.Title_Id inner join Genre G on GR.Genre_Id=G.Genre_Id where G.Genre='Fantasy'  ")

D.Find the number of books in each genre.

dbGetQuery(con,"select G.Genre,count(*) as NumBooks from Title T inner join hasGenre GR on T.Title_Id=GR.Title_Id inner join Genre G on GR.Genre_Id=G.Genre_Id group by G.Genre  ")

E.List the title and author of all books that cost less than the average price of books.

dbGetQuery(con,"select T.Title,A.AuthorName from Title T inner join Authorship AU on T.Title_Id=AU.Title_Id inner join Author A on AU.Author_Id=A.Author_Id where T.Price>6.35")