Data

con <- dbConnect(drv = RSQLite::SQLite(),
                 dbname = ":memory:")

customer <- read.csv("/Users/sophiekemprecos/Desktop/3280/HW 09/HW 9 Data/customer.csv")
rental <- read.csv("/Users/sophiekemprecos/Desktop/3280/HW 09/HW 9 Data/rental.csv")
film <- read.csv("/Users/sophiekemprecos/Desktop/3280/HW 09/HW 9 Data/film.csv")
inventory <- read.csv("/Users/sophiekemprecos/Desktop/3280/HW 09/HW 9 Data/inventory.csv")
category <- read.csv("/Users/sophiekemprecos/Desktop/3280/HW 09/HW 9 Data/category.csv")
film_category <- read.csv("/Users/sophiekemprecos/Desktop/3280/HW 09/HW 9 Data/film_category.csv")
actor <- read.csv("/Users/sophiekemprecos/Desktop/3280/HW 09/HW 9 Data/actor.csv")
film_actor <- read.csv("/Users/sophiekemprecos/Desktop/3280/HW 09/HW 9 Data/film_actor.csv")
payment <- read.csv("/Users/sophiekemprecos/Desktop/3280/HW 09/HW 9 Data/payment.csv")

dbWriteTable(conn = con, name = "customer", value = customer)
dbWriteTable(conn = con, name = "rental", value = rental)
dbWriteTable(conn = con, name = "film", value = film)
dbWriteTable(conn = con, name = "inventory", value = inventory)
dbWriteTable(conn = con, name = "category", value = category)
dbWriteTable(conn = con, name = "film_category", value = film_category)
dbWriteTable(conn = con, name = "actor", value = actor)
dbWriteTable(conn = con, name = "film_actor", value = film_actor)
dbWriteTable(conn = con, name = "payment", value = payment)
knitr::opts_chunk$set(connection = "con")

dbListTables(con)

## [1] "actor"         "category"      "customer"      "film"         
## [5] "film_actor"    "film_category" "inventory"     "payment"      
## [9] "rental"

SQL

List the top 10 customers who have rented the most films, including their first and last names, email addresses, and the total number of films they have rented.

SELECT
customer.first_name,
customer.last_name,
customer.email,
COUNT(rental.rental_id) AS total_rentals
FROM customer JOIN rental ON customer.customer_id = rental.customer_id
GROUP BY
customer.customer_id
ORDER BY
total_rentals DESC
LIMIT 10;

Displaying records 1 - 10
first_name	last_name	email	total_rentals
ELEANOR	HUNT	ELEANOR.HUNT@sakilacustomer.org	46
KARL	SEAL	KARL.SEAL@sakilacustomer.org	45
MARCIA	DEAN	MARCIA.DEAN@sakilacustomer.org	42
CLARA	SHAW	CLARA.SHAW@sakilacustomer.org	42
TAMMY	SANDERS	TAMMY.SANDERS@sakilacustomer.org	41
WESLEY	BULL	WESLEY.BULL@sakilacustomer.org	40
SUE	PETERS	SUE.PETERS@sakilacustomer.org	40
TIM	CARY	TIM.CARY@sakilacustomer.org	39
MARION	SNYDER	MARION.SNYDER@sakilacustomer.org	39
RHONDA	KENNEDY	RHONDA.KENNEDY@sakilacustomer.org	39

Find the titles and descriptions of films that have never been rented, sorted alphabetically by title.

SELECT
title, description
FROM film
LEFT JOIN inventory
ON film.film_id = inventory.film_id
LEFT JOIN rental
ON inventory.inventory_id = rental.inventory_id
WHERE rental.rental_id IS NULL
ORDER BY
film.title;

Displaying records 1 - 10
title	description
ACADEMY DINOSAUR	A Epic Drama of a Feminist And a Mad Scientist who must Battle a Teacher in The Canadian Rockies
ALICE FANTASIA	A Emotional Drama of a A Shark And a Database Administrator who must Vanquish a Pioneer in Soviet Georgia
APOLLO TEEN	A Action-Packed Reflection of a Crocodile And a Explorer who must Find a Sumo Wrestler in An Abandoned Mine Shaft
ARGONAUTS TOWN	A Emotional Epistle of a Forensic Psychologist And a Butler who must Challenge a Waitress in An Abandoned Mine Shaft
ARK RIDGEMONT	A Beautiful Yarn of a Pioneer And a Monkey who must Pursue a Explorer in The Sahara Desert
ARSENIC INDEPENDENCE	A Fanciful Documentary of a Mad Cow And a Womanizer who must Find a Dentist in Berlin
BOONDOCK BALLROOM	A Fateful Panorama of a Crocodile And a Boy who must Defeat a Monkey in The Gulf of Mexico
BUTCH PANTHER	A Lacklusture Yarn of a Feminist And a Database Administrator who must Face a Hunter in New Orleans
CATCH AMISTAD	A Boring Reflection of a Lumberjack And a Feminist who must Discover a Woman in Nigeria
CHINATOWN GLADIATOR	A Brilliant Panorama of a Technical Writer And a Lumberjack who must Escape a Butler in Ancient India

Retrieve a list of film categories and the average length of films in each category, ordered by the average length in descending order. Include the category name and the average length

SELECT
category.name,
AVG(film.length) AS avg_length
FROM film
JOIN film_category
ON film.film_id = film_category.film_id
JOIN category
ON film_category.category_id = category.category_id
GROUP BY category.name
ORDER BY
avg_length DESC;

Displaying records 1 - 10
name	avg_length
Sports	128.2027
Games	127.8361
Foreign	121.6986
Drama	120.8387
Comedy	115.8276
Family	114.7826
Music	113.6471
Travel	113.3158
Horror	112.4821
Classics	111.6667

Identify the 5 actors who have appeared in the most films, along with the number of films they’ve acted in. Include their first and last names.

SELECT
actor.first_name, actor.last_name,
COUNT(film_actor.film_id) AS film_count
FROM actor
JOIN film_actor
ON actor.actor_id = film_actor.actor_id
GROUP BY actor.actor_id
ORDER BY film_count DESC
LIMIT 5;

5 records
first_name	last_name	film_count
GINA	DEGENERES	42
WALTER	TORN	41
MARY	KEITEL	40
MATTHEW	CARREY	39
SANDRA	KILMER	37

List the names of customers who have rented at least one film starring the actor ’Johnny Depp.’

SELECT customer.first_name, customer.last_name
FROM customer
JOIN rental
ON customer.customer_id = rental.customer_id
JOIN inventory
ON rental.inventory_id = inventory.inventory_id
JOIN film
ON inventory.film_id = film.film_id
JOIN film_actor
ON film.film_id = film_actor.film_id
JOIN actor
ON film_actor.actor_id = actor.actor_id
WHERE actor.first_name = 'Johnny'
AND actor.last_name = 'Depp'
ORDER BY customer.last_name, customer.first_name;

0 records
first_name	last_name

Find the top 10 earning film titles along with their total revenue (rental fees), sorted by revenue in descending order. Include the film title and total revenue.**

SELECT film.title,
SUM(payment.amount) AS total_revenue
FROM film
JOIN inventory
ON film.film_id = inventory.film_id
JOIN rental
ON inventory.inventory_id = rental.inventory_id
JOIN payment
ON rental.rental_id = payment.rental_id
GROUP BY film.title
ORDER BY total_revenue DESC
LIMIT 10;

Displaying records 1 - 10
title	total_revenue
TELEGRAPH VOYAGE	231.73
WIFE TURN	223.69
ZORRO ARK	214.69
GOODFELLAS SALUTE	209.69
SATURDAY LAMBS	204.72
TITANS JERK	201.71
TORQUE BOUND	198.72
HARRY IDAHO	195.70
INNOCENT USUAL	191.74
HUSTLER PARTY	190.78

Plotly

Use the diamonds dataset to create a 2D plotly scatterplot (you can choose the variables). The plot should have a title and subtitle, axis labels, and hover information that is useful.

The following plotly shows the relationship between carat level and price. We see this mapped as a scatterplot in blue showing a positive relationship between the two.
```
scat_plot <- plot_ly(data = diamonds, x = ~carat, y = ~price, marker = list(color = 'navyblue'))
scat_plot <- scat_plot %>% 
  layout(title = list(text = paste0("Relationship Between Carat and Price",
        "<br><sup>Higher-carat diamonds tend to cost more</sup>")), 
         xaxis = list(title = 'Carat'), yaxis = list(title = 'Price')) 
scat_plot
```
```
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
```
```
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
```
Compare and contrast ggplot2 and plotly. What is an example of a time when you would definitely want to use each one over the other?

GGplot2 and Plotly have very similar functions. Both are sophisticated and easy to use software for creating descriptive plots in different formats. The above scatterplot using Plotly could be made very similarly using ggplot2 for a very close outcome. One major difference is that when there is a large amount of data being represented, Plotly allows you to see descriptive information for individual data points using the hover mechanism. For example, if you wanted to see the values of an outlier or a values in a mix of data points Plotly allows you to see specific values. When doing EDA the interactivitey is a very useful feature, however it does not knit into the pdf with the interactive features. This is one example where Plotly does not carry its advantages in some formats, so that is an important consideration for when to use ggplot or plotly. A good time to use plotly would also be when creating a dashboard. This is similar to Tableau, but you can make accessible dashboards or animations of plotly that can be shared.

Using the gapminder dataset, make a Plotly plot which compares two variables over time. Explain the meaning of this comparison in context and describe what narrative is advanced by this comparison.

The graph shows that as GDP per capita increases over time, life expectancy also tends to rise. These are not parallel trends but we do see significant changes over time. This is because better economic growth reflected by gdp per capita will also improve health outcomes and longevity of people. The narrative this advances is that economic development and people’s health are closely linked, where as a country becomes richer, people have better healthcare and can live longer. At the same time, the plot also suggests that gains in life expectancy may slow down at higher income levels, which may be due to diminishing returns from more income.
```
gap_data <- gapminder %>% 
  filter(country == "Indonesia")

gap_plot <- plot_ly()
gap_plot <- gap_plot %>%
  add_trace(data = gap_data, x = ~year, y = ~lifeExp, name = "Life Expectancy", line = list(color = "pink")) %>%
  add_trace(data = gap_data, x = ~year, y = ~gdpPercap/1000, name = "GDP Per Capita (thousands)", line = list(color = "purple")) %>%
  layout(title = "Life Expectancy and GDP per Capita in Indonesia",
    xaxis = list(title = "Year"),
    yaxis = list(title = "Life Expectancy / GDP per Capita in thousands")) %>% style(visible = "legendonly", traces = 0)
```
```
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
```
```
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
```
```
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
```
```
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
```
```
## A line object has been specified, but lines is not in the mode
## Adding lines to the mode...
## A line object has been specified, but lines is not in the mode
## Adding lines to the mode...
```
```
gap_plot
```

HW 09 Plotly and SQL

2025-11-24

Data

SQL

Plotly