This R Markdown document uses DVD rental database to develop an interactive document. To do this we connect R with a Postgres database using the package ‘RPostgreSQL’.
Question 1: Is there any particular actor/actress that is more profitable in terms of movie rents? Perhaps the company could make an add featuring prominent actors so it can boost theirs rents and by doing so its revenues.
SELECT
actor.first_name||' '||actor.last_name AS actor_name,
SUM(payment.amount) AS amount
FROM
actor
JOIN
film_actor
ON
actor.actor_id = film_actor.actor_id
JOIN
film
ON
film_actor.film_id = film.film_id
JOIN
inventory
ON
film.film_id = inventory.film_id
JOIN
rental
ON
inventory.inventory_id = rental.inventory_id
JOIN
payment
ON
rental.rental_id = payment.rental_id
GROUP BY
actor.first_name||' '||actor.last_name
ORDER BY
SUM(payment.amount) DESC
LIMIT 10
highchart() %>%
hc_chart(type = 'bar') %>%
hc_xAxis(categories = profitable_actor_actress$actor_name) %>%
hc_add_series(name = 'Revenue by Actor/Actress', data = round(profitable_actor_actress$amount,0))
Susan Davis, Gina Degeneres and Matthew Carrey could do a commercial as an attempt to boost sales.
Question 2: Is the rating of the film important to the revenues? Perhaps the company could shift its attention to a more profitable market instead of having all markets.
SELECT
film.rating AS film_rating,
COUNT(DISTINCT customer.customer_id) AS rents,
ROUND(SUM(payment.amount)) AS revenue
FROM
actor
JOIN
film_actor
ON
actor.actor_id = film_actor.actor_id
JOIN
film
ON
film_actor.film_id = film.film_id
JOIN
inventory
ON
film.film_id = inventory.film_id
JOIN
rental
ON
inventory.inventory_id = rental.inventory_id
JOIN
payment
ON
rental.rental_id = payment.rental_id
JOIN
customer
ON
payment.customer_id = customer.customer_id
GROUP BY
film.rating
ORDER BY
SUM(payment.amount) DESC
highchart() %>%
hc_chart(type = 'column') %>%
hc_xAxis(categories = rating_revenue$film_rating) %>%
hc_add_series(name = 'Revenue by rating', data = round(rating_revenue$revenue,0))
Question 3: What are the top and least rented movies based on categories and their total revenues? (by Okoh Anita in freeCodeCamp)
SELECT
category.name AS category,
COUNT (DISTINCT customer.customer_id) AS rents,
SUM(payment.amount) AS amount
FROM
category
JOIN
film_category
ON
category.category_id = film_category.category_id
JOIN
film
ON
film_category.film_id = film.film_id
JOIN
inventory
ON
film.film_id = inventory.film_id
JOIN
rental
ON
rental.inventory_id = inventory.inventory_id
JOIN
payment
ON
rental.rental_id = payment.rental_id
JOIN
customer
ON
payment.customer_id = customer.customer_id
GROUP BY
category.name
ORDER BY
SUM(payment.amount) DESC
highchart() %>%
hc_yAxis_multiples(list(title = list(text = 'Revenue'), min = 0, max = max(demand_revenue_per_category$amount),
showFirstLabel = TRUE, showLastLabel = TRUE, opposite = FALSE),
list(title = list(text = 'Rents'), min = 0, max = max(demand_revenue_per_category$rents),
showFirstLabel = TRUE, showLastLabel = FALSE, opposite = TRUE)) %>%
hc_xAxis(categories = demand_revenue_per_category$category) %>%
hc_add_series(data = demand_revenue_per_category, name = 'Revenue', type = 'column', hcaes(y = amount)) %>%
hc_add_series(data = demand_revenue_per_category, name = 'Rents', type = 'column', hcaes(y = rents))
## Warning: `parse_quosure()` is deprecated as of rlang 0.2.0.
## Please use `parse_quo()` instead.
## This warning is displayed once per session.
Sports, Sci-Fi and Animation are the top 3 in Revenue and share some of the biggest values also in rents.
Question 4: Which are the most relevant countries in terms on rents and revenue for the company? Maybe we could reinforced those markets instead of spreading resources in markets that are not profitable.
SELECT
country.country AS country,
COUNT (DISTINCT customer.customer_id) AS demand,
ROUND(SUM(payment.amount)) AS revenue
FROM
category
JOIN
film_category
ON
category.category_id = film_category.category_id
JOIN
film
ON
film_category.film_id = film.film_id
JOIN
inventory
ON
film.film_id = inventory.film_id
JOIN
rental
ON
rental.inventory_id = inventory.inventory_id
JOIN
payment
ON
rental.rental_id = payment.rental_id
JOIN
customer
ON
payment.customer_id = customer.customer_id
JOIN
address
ON
customer.address_id = address.address_id
JOIN
city
ON
address.city_id = city.city_id
JOIN
country
ON
city.country_id = country.country_id
GROUP BY
country.country
HAVING
COUNT (DISTINCT customer.customer_id) >= 10
ORDER BY
SUM(payment.amount) DESC
countries <- data_frame(
country = Top_demand_per_country$country,
lat = c(20.593684, 35.86166, 37.09024, 36.204824, 23.634501, -14.235004, 61.52401, 12.879721, 38.963745, -0.789275, 9.081999, -38.416097, 23.69781, -30.559482),
lon = c(78.96288, 104.195397, -95.712891, 138.252924, -102.552784, -51.92528, 105.318756, 121.774017, 35.243322, 113.921327, 8.675277, -63.616672, 120.960515, 22.937506)
)
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
Top_demand_per_country <- inner_join(Top_demand_per_country,countries, by = 'country')
# using '<br/ to create a space between words>'
Top_demand_per_country <- Top_demand_per_country %>%
mutate(popup_info = paste(country,'<br/>','Demand:',demand,'<br/>','Revenue:',revenue))
leaflet() %>%
addTiles() %>%
addCircleMarkers(data = Top_demand_per_country,lat = ~lat, lng = ~lon, radius = ~10, popup = ~popup_info)
Question 5: How the revenues have behaved in june based on movie category? Just consider the top 5 based on rents(question 3)
SELECT
rent_per_day_table.rental_date AS rental_date,
rent_per_day_table.movie_category AS movie_category,
rent_per_day_table.total_rents_per_day AS rents_per_day,
SUM(rent_per_day_table.total_revenue) OVER
(PARTITION BY rent_per_day_table.movie_category ORDER BY rent_per_day_table.rental_date) AS cum_revenue
FROM(
SELECT
temp_rent_table.rental_date AS rental_date,
temp_rent_table.category AS movie_category,
COUNT (DISTINCT temp_rent_table.rental_id) AS total_rents_per_day,
SUM(temp_rent_table.amount) AS total_revenue
FROM(
SELECT
DATE(rental.rental_date) AS rental_date,
rental.rental_id AS rental_id,
category.name AS category,
payment.amount AS amount
FROM
film
JOIN
inventory
ON
film.film_id = inventory.film_id
JOIN
rental
ON
rental.inventory_id = inventory.inventory_id
JOIN
payment
ON
rental.rental_id = payment.rental_id
JOIN
film_category
ON
film.film_id = film_category.film_id
JOIN
category
ON
film_category.category_id = category.category_id
WHERE
inventory.inventory_id IS NOT NULL AND
rental.rental_id IS NOT NULL AND
EXTRACT(YEAR FROM rental_date) = 2005 AND
EXTRACT(MONTH FROM rental_date) = 6
) temp_rent_table
GROUP BY
temp_rent_table.rental_date,
temp_rent_table.category
) rent_per_day_table
highchart() %>%
hc_chart(type = 'line') %>%
hc_series(list(name = 'Sports', data = cumulative_revenue_per_day[cumulative_revenue_per_day$movie_category == 'Sports',]$cum_revenue),
list(name = 'Animation', data = cumulative_revenue_per_day[cumulative_revenue_per_day$movie_category == 'Animation',]$cum_revenue),
list(name = 'Action', data = cumulative_revenue_per_day[cumulative_revenue_per_day$movie_category == 'Action',]$cum_revenue),
list(name = 'Sci-Fi', data = cumulative_revenue_per_day[cumulative_revenue_per_day$movie_category == 'Sci-Fi',]$cum_revenue),
list(name = 'Family', data = cumulative_revenue_per_day[cumulative_revenue_per_day$movie_category == 'Family',]$cum_revenue)) %>%
hc_xAxis(categories = cumulative_revenue_per_day$rental_date)
Question 6: If the company wants to reward premium users, it needs to identify their top 20. For this the company might need the customer’s details.
SELECT
customer.first_name || ' ' || customer.last_name AS customer_name,
ROUND(SUM(payment.amount)) AS total_payment,
customer.email AS email,
address.address AS address,
address.phone AS phone,
city.city AS city,
country.country AS country
FROM
customer
JOIN
payment
ON
customer.customer_id = payment.customer_id
JOIN
address
ON
customer.address_id = address.address_id
JOIN
city
ON
address.city_id = city.city_id
JOIN
country
ON
city.country_id = country.country_id
GROUP BY
customer.first_name || ' ' || customer.last_name,
customer.email,
address.address,
address.phone,
city.city,
country.country
ORDER BY
SUM(payment.amount) DESC
LIMIT 20
datatable(top_20_clients, colnames = c('Customer','Total payment','E-mail','Address','Phone','City','Country'))
Question 7: How many loses or replacement cost the company is incurring by clients that are not returning the rented films?
SELECT
SUM(film.replacement_cost) AS incurring_costs
FROM
film
JOIN
inventory
ON
film.film_id = inventory.inventory_id
JOIN
rental
ON
inventory.inventory_id = rental.inventory_id
JOIN
customer
ON
rental.customer_id = customer.customer_id
WHERE
rental.return_date IS NULL
The cost is 911.55 dollars.
Question 8: What is the average rental rate for each category? (by Okoh Anita in freeCodeCamp)
SELECT
category.name AS category,
ROUND(AVG(film.rental_rate),2) AS average_rental_rate
FROM
film
JOIN
film_category
ON
film.film_id = film_category.film_id
JOIN
category
ON
film_category.category_id = category.category_id
GROUP BY
category.name
ORDER BY
ROUND(AVG(film.rental_rate),2) DESC
highchart() %>%
hc_chart(type = 'bar') %>%
hc_xAxis(categories = average_rental_rate$category) %>%
hc_add_series(name = 'Average rental rate per category', data = average_rental_rate$average_rental_rate)
Question 9: How many films were returned in time, late or never returned? (by Okoh Anita in freeCodeCamp with modification)
SELECT
returned_days.return_description AS return_description,
COUNT(returned_days.inventory_id) AS number_of_films
FROM(
SELECT
inventory.inventory_id AS inventory_id,
film.rental_duration AS rental_duration,
DATE(rental.rental_date) AS rental_date,
DATE(rental.return_date) AS return_date,
DATE(rental.return_date) - DATE(rental.rental_date) AS days_returned,
CASE
WHEN DATE(rental.return_date) - DATE(rental.rental_date) = film.rental_duration
THEN 'return in time'
WHEN DATE(rental.return_date) - DATE(rental.rental_date) > film.rental_duration
THEN 'return late'
WHEN DATE(rental.return_date) - DATE(rental.rental_date) < film.rental_duration
THEN 'return early'
WHEN rental.return_date IS NULL
THEN 'never returned'
END AS return_description
FROM
film
JOIN
inventory
ON
film.film_id = inventory.film_id
JOIN
rental
ON
inventory.inventory_id = rental.inventory_id
) returned_days
GROUP BY
returned_days.return_description
ORDER BY
COUNT(returned_days.inventory_id) DESC
highchart() %>%
hc_chart(type = 'column') %>%
hc_xAxis(categories = returned_films$return_description) %>%
hc_add_series(name = 'Description', data = returned_films$number_of_films)
dbDisconnect(conn = con)
## [1] TRUE