Import Libraries
# Import Libraries
library(RSQLite)
library(DBI)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Warning in fun(libname, pkgname): couldn't connect to display ":0"
library(ggplot2)
library(viridis)
## Loading required package: viridisLite
Initial Data Analysis
# Connect to Database
CFA_db <- dbConnect(SQLite(), "CFA.db")
#Fetch the Result
dbListTables(CFA_db)
## [1] "Agent" "Company" "Customer" "Investment" "Portfolio"
## [6] "Security"
# Agent Table
agent_data <- tbl(CFA_db, "Agent")
agent_df = collect(agent_data)
agent_df
## # A tibble: 10 × 7
## agentId agentFName agentLName agentGender agentAddress agentContact
## <int> <chr> <chr> <chr> <chr> <chr>
## 1 1 James Smith Male 2749 Lee Avenue 8563071183
## 2 2 Michael William Male 4165 Godfrey Road 2125079835
## 3 3 Maria Garcia Female 3393 Marshville Road 8456142839
## 4 4 David Rodriguez Male 1121 Willow Greene Dr… 3343021874
## 5 5 Elizabeth Johnson Female 3755 Trouser Leg Road 4139942499
## 6 6 Jessica Addison Female 2400 Caynor Circle 9084966654
## 7 7 Emily Madison Female 1313 Bryan Street 3362097310
## 8 8 Jacob Tyler Male 2269 Oak Way 4024521227
## 9 9 Jose Alexander Male 4461 Windy Ridge Road 2604874772
## 10 10 Ava Isabella Female 1438 Camden Street 7752547641
## # … with 1 more variable: agentEmail <chr>
# Company Table
company_data <- tbl(CFA_db, "Company")
company_df = collect(company_data)
company_df
## # A tibble: 8 × 3
## companyId companyName companyType
## <int> <chr> <chr>
## 1 1 Walmart Retail
## 2 2 Amazon Retail
## 3 3 Apple Inc. Electronics industry
## 4 4 CVS Health Healthcare
## 5 5 UnitedHealth Group Healthcare
## 6 6 AmerisourceBergen Pharmaceutical industry
## 7 7 Microsoft Technology
## 8 8 JPMorgan Chase Financial services
# Customer Table
customer_data <- tbl(CFA_db, "Customer")
customer_df = collect(customer_data)
customer_df
## # A tibble: 10 × 8
## customerId customerFName customerLName customerGender customerAddreess
## <int> <chr> <chr> <chr> <chr>
## 1 1 Luis Carlos Male 4887 Perine Street
## 2 2 Joseph Carter Male 3153 Elmwood Avenue
## 3 3 Daniel Elijah Male 2991 Lena Lane
## 4 4 Ryan Jackson Male 1716 Francis Mine
## 5 5 Olivia Abigail Female 3301 Orchard Street
## 6 6 Sophia Mia Female 1636 Hudson Street
## 7 7 Natalie Grace Female 1751 Florence Street
## 8 8 Diego Benjamin Male 4284 Parkway Drive
## 9 9 Kevin Logan Male 4828 Perry Street
## 10 10 Paola Genesis Female 3321 Mulberry Street
## # … with 3 more variables: customerContact <int64>, customerEmail <chr>,
## # customerJob <chr>
# Investment Table
investment_data <- tbl(CFA_db, "Investment")
investment_df = collect(investment_data)
investment_df
## # A tibble: 15 × 8
## investmentId customerId companyId agentId Date unit unitPrice Amount
## <int> <int> <int> <int> <chr> <int> <int> <int>
## 1 1 2 3 1 12-01-2020 12 125 1500
## 2 2 1 4 3 23-02-2020 15 90 1350
## 3 3 4 7 2 23-02-2020 25 110 2750
## 4 4 3 5 4 25-02-2020 18 96 1728
## 5 5 6 6 7 10-03-2020 30 75 2250
## 6 6 5 2 5 17-03-2020 10 135 1350
## 7 7 7 3 6 15-03-2020 16 125 2000
## 8 8 8 8 9 20-03-2020 20 105 2100
## 9 9 10 1 8 20-03-2020 18 100 1800
## 10 10 9 2 10 20-03-2021 15 135 2025
## 11 11 3 4 4 26-03-2020 25 90 2250
## 12 12 6 4 3 28-03-2020 20 90 1800
## 13 13 4 3 1 04-04-2020 22 125 2750
## 14 14 2 7 2 06-04-2020 28 110 3080
## 15 15 7 7 2 10-04-2020 30 110 3300
# Portfolio Table
portfolio_data <- tbl(CFA_db, "Portfolio")
portfolio_df = collect(portfolio_data)
portfolio_df
## # A tibble: 15 × 5
## portfolioId customerId investmentId portfolioName portfolioStatus
## <int> <int> <int> <chr> <chr>
## 1 1 2 1 Stock Received
## 2 2 1 2 Stock Received
## 3 3 4 3 Bond Received
## 4 4 3 4 Bond Received
## 5 5 6 5 Bond Processing
## 6 6 5 6 Stock Received
## 7 7 7 7 Stock Cancelled
## 8 8 8 8 Certificate Processing
## 9 9 10 9 Bond Received
## 10 10 9 10 Bond Received
## 11 11 3 11 Stock Processing
## 12 12 6 12 Stock Processing
## 13 13 4 13 Bond Cancelled
## 14 14 2 14 Stock Received
## 15 15 7 15 Stock Processing
# Security Table
security_data <- tbl(CFA_db, "Security")
security_df = collect(security_data)
security_df
## # A tibble: 15 × 4
## securityId investmentId securityType securityAmount
## <int> <int> <chr> <int>
## 1 1 1 Debt 180
## 2 2 2 Equity 203
## 3 3 3 Derivative 495
## 4 4 4 Equity 260
## 5 5 5 Debt 270
## 6 6 6 Debt 162
## 7 7 7 Debt 240
## 8 8 8 Derivative 378
## 9 9 9 Equity 270
## 10 10 10 Equity 304
## 11 11 11 Equity 338
## 12 12 12 Equity 270
## 13 13 13 Debt 330
## 14 14 14 Derivative 555
## 15 15 15 Derivative 594
Query 1
Find the customer
names and their investment details.
inv_Customers <- sqldf("SELECT customerFName, customerLName, companyName, companyType, Amount
FROM customer_df cu, company_df co, investment_df inv
WHERE cu.customerId = inv.customerId AND
co.companyId = inv.companyId", method = "raw")
inv_Customers
## customerFName customerLName companyName companyType
## 1 Luis Carlos CVS Health Healthcare
## 2 Joseph Carter Apple Inc. Electronics industry
## 3 Joseph Carter Microsoft Technology
## 4 Daniel Elijah CVS Health Healthcare
## 5 Daniel Elijah UnitedHealth Group Healthcare
## 6 Ryan Jackson Apple Inc. Electronics industry
## 7 Ryan Jackson Microsoft Technology
## 8 Olivia Abigail Amazon Retail
## 9 Sophia Mia CVS Health Healthcare
## 10 Sophia Mia AmerisourceBergen Pharmaceutical industry
## 11 Natalie Grace Apple Inc. Electronics industry
## 12 Natalie Grace Microsoft Technology
## 13 Diego Benjamin JPMorgan Chase Financial services
## 14 Kevin Logan Amazon Retail
## 15 Paola Genesis Walmart Retail
## Amount
## 1 1350
## 2 1500
## 3 3080
## 4 2250
## 5 1728
## 6 2750
## 7 2750
## 8 1350
## 9 1800
## 10 2250
## 11 2000
## 12 3300
## 13 2100
## 14 2025
## 15 1800
Visualization.
ggplot(inv_Customers, aes(fill=companyType, x=customerFName, y=Amount)) +
geom_bar(position="stack", stat="identity") +
scale_fill_viridis(discrete = T) +
xlab("Customer Name") +
ylab("Invested Amount") +
ggtitle("Investment Details")

Query 2
Find the customer
names and portfolio details for received portfolio status.
pf_Rec_Customers <- sqldf("SELECT customerFName, customerLName, portfolioName, portfolioStatus
FROM customer_df c, portfolio_df p
WHERE c.customerId = p.customerId AND
p.portfolioStatus = 'Received'", method = "raw")
pf_Rec_Customers
## customerFName customerLName portfolioName portfolioStatus
## 1 Joseph Carter Stock Received
## 2 Luis Carlos Stock Received
## 3 Ryan Jackson Bond Received
## 4 Daniel Elijah Bond Received
## 5 Olivia Abigail Stock Received
## 6 Paola Genesis Bond Received
## 7 Kevin Logan Bond Received
## 8 Joseph Carter Stock Received
Query 3
Find the customer
names and their investment details who have invested in Healthcare
industry.
inv_Hc_Customers <- sqldf("SELECT customerFName, customerLName, companyName, companyType, Amount
FROM customer_df cu, company_df co, investment_df inv
WHERE cu.customerId = inv.customerId AND
co.companyId = inv.companyId AND
co.companyType = 'Healthcare'", method = "raw")
inv_Hc_Customers
## customerFName customerLName companyName companyType Amount
## 1 Luis Carlos CVS Health Healthcare 1350
## 2 Daniel Elijah CVS Health Healthcare 2250
## 3 Sophia Mia CVS Health Healthcare 1800
## 4 Daniel Elijah UnitedHealth Group Healthcare 1728
Visualization.
ggplot(inv_Hc_Customers, aes(y=Amount, x=customerFName)) +
geom_boxplot(
color="blue",
fill="blue",
alpha=0.2,
notch=FALSE,
notchwidth = 0.8,
outlier.colour="red",
outlier.fill="red",
outlier.size=3
) +
xlab("Customer Name") +
ylab("Invested Amount") +
ggtitle("Healthcare Investment Details")

Query 4
Find the customer
names and their investment details who have invested higher than the
average invested amount.
avg_Amt_Customers <- sqldf("SELECT customerFName, customerLName, companyName, companyType, Amount
FROM customer_df cu, company_df co, investment_df inv
WHERE cu.customerId = inv.customerId AND
co.companyId = inv.companyId AND
amount > (SELECT AVG(amount) FROM investment_df)", method = "raw")
avg_Amt_Customers
## customerFName customerLName companyName companyType Amount
## 1 Ryan Jackson Microsoft Technology 2750
## 2 Sophia Mia AmerisourceBergen Pharmaceutical industry 2250
## 3 Daniel Elijah CVS Health Healthcare 2250
## 4 Ryan Jackson Apple Inc. Electronics industry 2750
## 5 Joseph Carter Microsoft Technology 3080
## 6 Natalie Grace Microsoft Technology 3300
Query 5
Find the unit price
of the products offered by each of the companies.
up_Type_Companies <- sqldf("SELECT companyName, companyType, unitPrice
FROM company_df co, investment_df inv
WHERE co.companyId = inv.companyId
GROUP BY companyName", method = "raw")
up_Type_Companies
## companyName companyType unitPrice
## 1 Amazon Retail 135
## 2 AmerisourceBergen Pharmaceutical industry 75
## 3 Apple Inc. Electronics industry 125
## 4 CVS Health Healthcare 90
## 5 JPMorgan Chase Financial services 105
## 6 Microsoft Technology 110
## 7 UnitedHealth Group Healthcare 96
## 8 Walmart Retail 100
Visualization.
ggplot(data = up_Type_Companies, mapping = aes(fill=companyName, x = companyName, y = unitPrice)) +
geom_bar(stat = "identity") +
xlab("Company Name") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
ylab("Unit Price") +
ggtitle("Unit Price Analysis")

Query 6
Find the total
investment amount for each type of companies.
inv_Type_Companies <- sqldf("SELECT companyType, sum(amount) as investedAmount
FROM company_df co, investment_df inv
WHERE co.companyId = inv.companyId
GROUP BY companyType", method = "raw")
inv_Type_Companies
## companyType investedAmount
## 1 Electronics industry 6250
## 2 Financial services 2100
## 3 Healthcare 7128
## 4 Pharmaceutical industry 2250
## 5 Retail 5175
## 6 Technology 9130
Visualization.
ggplot(data = inv_Type_Companies, mapping = aes(fill=companyType, x = companyType, y = investedAmount)) +
geom_bar(stat = "identity") +
xlab("Company Type") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
ylab("Invested Amount") +
ggtitle("Investment Analysis")
