This R Markdown document uses DVD rental database to develop an interactive document. To do this we connect R with a Postgres database using the package ‘RPostgreSQL’.

Question 1: Is there any particular actor/actress that is more profitable in terms of movie rents? Perhaps the company could make an add featuring prominent actors so it can boost theirs rents and by doing so its revenues.

SELECT
  actor.first_name||' '||actor.last_name AS actor_name,
  SUM(payment.amount)                    AS amount
FROM
    actor
JOIN
    film_actor
ON
    actor.actor_id = film_actor.actor_id
JOIN
    film
ON
    film_actor.film_id = film.film_id
JOIN
    inventory
ON
  film.film_id = inventory.film_id
JOIN
    rental
ON
    inventory.inventory_id = rental.inventory_id
JOIN
    payment
ON
    rental.rental_id = payment.rental_id
GROUP BY
    actor.first_name||' '||actor.last_name
ORDER BY
    SUM(payment.amount) DESC
LIMIT 10
  highchart() %>%
  hc_chart(type = 'bar') %>%
  hc_xAxis(categories = profitable_actor_actress$actor_name) %>%
  hc_add_series(name = 'Revenue  by Actor/Actress', data = round(profitable_actor_actress$amount,0))

Susan Davis, Gina Degeneres and Matthew Carrey could do a commercial as an attempt to boost sales.

Question 2: Is the rating of the film important to the revenues? Perhaps the company could shift its attention to a more profitable market instead of having all markets.


SELECT
  film.rating                                  AS film_rating,
  COUNT(DISTINCT customer.customer_id)         AS rents,
  ROUND(SUM(payment.amount))                   AS revenue
FROM
  actor
JOIN
  film_actor
ON
  actor.actor_id = film_actor.actor_id
JOIN
  film
ON
  film_actor.film_id = film.film_id
JOIN
  inventory
ON
  film.film_id = inventory.film_id
JOIN
  rental
ON
  inventory.inventory_id = rental.inventory_id
JOIN
  payment
ON
  rental.rental_id = payment.rental_id
JOIN
  customer
ON
  payment.customer_id = customer.customer_id
GROUP BY
  film.rating
ORDER BY
  SUM(payment.amount) DESC
highchart() %>%
  hc_chart(type = 'column') %>%
  hc_xAxis(categories = rating_revenue$film_rating) %>%
  hc_add_series(name = 'Revenue  by rating', data = round(rating_revenue$revenue,0))

Question 3: What are the top and least rented movies based on categories and their total revenues? (by Okoh Anita in freeCodeCamp)

SELECT
  category.name                         AS category,
  COUNT (DISTINCT customer.customer_id) AS rents,
  SUM(payment.amount)                   AS amount
FROM
    category
JOIN
    film_category
ON
    category.category_id = film_category.category_id
JOIN
    film
ON
    film_category.film_id = film.film_id
JOIN
    inventory
ON
    film.film_id = inventory.film_id
JOIN
    rental
ON
    rental.inventory_id = inventory.inventory_id
JOIN
    payment
ON
    rental.rental_id = payment.rental_id
JOIN
    customer
ON
    payment.customer_id = customer.customer_id
GROUP BY
    category.name
ORDER BY
  SUM(payment.amount) DESC
highchart() %>%
  hc_yAxis_multiples(list(title = list(text = 'Revenue'), min = 0, max = max(demand_revenue_per_category$amount),
                          showFirstLabel = TRUE, showLastLabel = TRUE, opposite = FALSE),
                     list(title = list(text = 'Rents'), min = 0, max = max(demand_revenue_per_category$rents),
                          showFirstLabel = TRUE, showLastLabel = FALSE, opposite = TRUE)) %>%
  hc_xAxis(categories = demand_revenue_per_category$category) %>%
  hc_add_series(data = demand_revenue_per_category, name = 'Revenue', type = 'column', hcaes(y = amount)) %>%
  hc_add_series(data = demand_revenue_per_category, name = 'Rents', type = 'column', hcaes(y = rents))
## Warning: `parse_quosure()` is deprecated as of rlang 0.2.0.
## Please use `parse_quo()` instead.
## This warning is displayed once per session.

Sports, Sci-Fi and Animation are the top 3 in Revenue and share some of the biggest values also in rents.

Question 4: Which are the most relevant countries in terms on rents and revenue for the company? Maybe we could reinforced those markets instead of spreading resources in markets that are not profitable.

SELECT
  country.country                       AS country,
  COUNT (DISTINCT customer.customer_id) AS demand,
  ROUND(SUM(payment.amount))            AS revenue
FROM
  category
JOIN
  film_category
ON
  category.category_id = film_category.category_id
JOIN
  film
ON
  film_category.film_id = film.film_id
JOIN
  inventory
ON
  film.film_id = inventory.film_id
JOIN
  rental
ON
  rental.inventory_id = inventory.inventory_id
JOIN
  payment
ON
  rental.rental_id = payment.rental_id
JOIN
  customer
ON
  payment.customer_id = customer.customer_id
JOIN
  address
ON
  customer.address_id = address.address_id
JOIN
  city
ON
  address.city_id = city.city_id
JOIN
  country
ON
  city.country_id = country.country_id
GROUP BY
  country.country
HAVING
  COUNT (DISTINCT customer.customer_id) >= 10
ORDER BY
  SUM(payment.amount) DESC
countries <- data_frame(
  country = Top_demand_per_country$country,
  lat = c(20.593684, 35.86166,  37.09024, 36.204824, 23.634501, -14.235004, 61.52401, 12.879721, 38.963745, -0.789275, 9.081999, -38.416097, 23.69781, -30.559482),
  lon = c(78.96288, 104.195397, -95.712891, 138.252924, -102.552784, -51.92528, 105.318756, 121.774017, 35.243322, 113.921327, 8.675277, -63.616672, 120.960515, 22.937506)
)
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
Top_demand_per_country <- inner_join(Top_demand_per_country,countries, by = 'country')

# using '<br/ to create a space between words>'
Top_demand_per_country <- Top_demand_per_country %>%
  mutate(popup_info = paste(country,'<br/>','Demand:',demand,'<br/>','Revenue:',revenue))

leaflet() %>%
  addTiles() %>%
  addCircleMarkers(data = Top_demand_per_country,lat = ~lat, lng = ~lon, radius = ~10, popup = ~popup_info)

Question 5: How the revenues have behaved in june based on movie category? Just consider the top 5 based on rents(question 3)


SELECT
    rent_per_day_table.rental_date         AS rental_date,
  rent_per_day_table.movie_category      AS movie_category,
  rent_per_day_table.total_rents_per_day AS rents_per_day,
  SUM(rent_per_day_table.total_revenue) OVER 
        (PARTITION BY rent_per_day_table.movie_category ORDER BY rent_per_day_table.rental_date) AS cum_revenue
FROM(
  SELECT
    temp_rent_table.rental_date AS rental_date,
    temp_rent_table.category AS movie_category,
    COUNT (DISTINCT temp_rent_table.rental_id) AS total_rents_per_day,
    SUM(temp_rent_table.amount) AS total_revenue
  FROM(
    SELECT 
      DATE(rental.rental_date) AS rental_date,
      rental.rental_id AS rental_id,
      category.name AS category,
      payment.amount AS amount
    FROM 
      film
    JOIN
      inventory
    ON
      film.film_id = inventory.film_id
    JOIN
      rental
    ON
      rental.inventory_id = inventory.inventory_id
    JOIN
      payment
    ON
      rental.rental_id = payment.rental_id
    JOIN
      film_category
    ON
      film.film_id = film_category.film_id
    JOIN
    category
    ON
      film_category.category_id = category.category_id
    WHERE 
      inventory.inventory_id IS NOT NULL AND
      rental.rental_id IS NOT NULL AND
      EXTRACT(YEAR FROM rental_date) = 2005 AND
      EXTRACT(MONTH FROM rental_date) = 6
  ) temp_rent_table
  GROUP BY
    temp_rent_table.rental_date,
    temp_rent_table.category
) rent_per_day_table
highchart() %>%
  hc_chart(type = 'line') %>%
  hc_series(list(name = 'Sports', data = cumulative_revenue_per_day[cumulative_revenue_per_day$movie_category == 'Sports',]$cum_revenue),
            list(name = 'Animation', data = cumulative_revenue_per_day[cumulative_revenue_per_day$movie_category == 'Animation',]$cum_revenue),
            list(name = 'Action', data = cumulative_revenue_per_day[cumulative_revenue_per_day$movie_category == 'Action',]$cum_revenue),
            list(name = 'Sci-Fi', data = cumulative_revenue_per_day[cumulative_revenue_per_day$movie_category == 'Sci-Fi',]$cum_revenue),
            list(name = 'Family', data = cumulative_revenue_per_day[cumulative_revenue_per_day$movie_category == 'Family',]$cum_revenue)) %>%
  hc_xAxis(categories = cumulative_revenue_per_day$rental_date)

Question 6: If the company wants to reward premium users, it needs to identify their top 20. For this the company might need the customer’s details.

SELECT
      customer.first_name || ' ' || customer.last_name AS customer_name,
      ROUND(SUM(payment.amount))                       AS total_payment,
      customer.email                                   AS email,
      address.address                                  AS address,
      address.phone                                    AS phone,
      city.city                                        AS city,
      country.country                                  AS country
  FROM
      customer
  JOIN
      payment
  ON
    customer.customer_id = payment.customer_id
  JOIN
    address
  ON
    customer.address_id = address.address_id
  JOIN
    city
  ON
    address.city_id = city.city_id
  JOIN
    country
  ON
    city.country_id = country.country_id
  GROUP BY
    customer.first_name || ' ' || customer.last_name,
    customer.email,
    address.address,
    address.phone,
    city.city,
    country.country
  ORDER BY
    SUM(payment.amount) DESC
  LIMIT 20
datatable(top_20_clients, colnames = c('Customer','Total payment','E-mail','Address','Phone','City','Country'))

Question 7: How many loses or replacement cost the company is incurring by clients that are not returning the rented films?


SELECT
  SUM(film.replacement_cost) AS incurring_costs
FROM
  film
JOIN
    inventory
ON
    film.film_id = inventory.inventory_id
JOIN
    rental
ON
    inventory.inventory_id = rental.inventory_id
JOIN
    customer
ON
    rental.customer_id = customer.customer_id
WHERE
    rental.return_date IS NULL

The cost is 911.55 dollars.

Question 8: What is the average rental rate for each category? (by Okoh Anita in freeCodeCamp)

SELECT
  category.name                  AS category,
  ROUND(AVG(film.rental_rate),2) AS average_rental_rate
FROM
  film
JOIN
  film_category
ON
  film.film_id = film_category.film_id
JOIN
  category
ON
  film_category.category_id = category.category_id
GROUP BY
  category.name
ORDER BY
  ROUND(AVG(film.rental_rate),2) DESC
highchart() %>%
  hc_chart(type = 'bar') %>%
  hc_xAxis(categories = average_rental_rate$category) %>%
  hc_add_series(name = 'Average rental rate per category', data = average_rental_rate$average_rental_rate)

Question 9: How many films were returned in time, late or never returned? (by Okoh Anita in freeCodeCamp with modification)

SELECT
     returned_days.return_description                      AS return_description,
    COUNT(returned_days.inventory_id)                     AS number_of_films
  FROM(
    SELECT
      inventory.inventory_id                              AS inventory_id,
      film.rental_duration                                AS rental_duration,
      DATE(rental.rental_date)                            AS rental_date,
      DATE(rental.return_date)                            AS return_date,
      DATE(rental.return_date) - DATE(rental.rental_date) AS days_returned,
      CASE
        WHEN DATE(rental.return_date) - DATE(rental.rental_date) = film.rental_duration
          THEN 'return in time'
        WHEN DATE(rental.return_date) - DATE(rental.rental_date) > film.rental_duration
          THEN 'return late'
        WHEN DATE(rental.return_date) - DATE(rental.rental_date) < film.rental_duration
          THEN 'return early'
        WHEN rental.return_date IS NULL
          THEN 'never returned'
      END                                                 AS return_description
    FROM
      film
    JOIN
      inventory
    ON
      film.film_id = inventory.film_id
    JOIN
      rental
    ON
      inventory.inventory_id = rental.inventory_id
) returned_days
  GROUP BY
    returned_days.return_description
  ORDER BY
    COUNT(returned_days.inventory_id) DESC
highchart() %>%
  hc_chart(type = 'column') %>%
  hc_xAxis(categories = returned_films$return_description) %>%
  hc_add_series(name = 'Description', data = returned_films$number_of_films)
dbDisconnect(conn = con)
## [1] TRUE