Packages to install
## Using sakilla Database * ### Look @ Sakilla schema in Workbench
File -> Open models -> C:FilesWorkbench 6.3 CE
open odbc -> search for odbc
Select sakilla
Configure sakilla
Test sakilla Press Test
install RODBC library
tools -> install Packages -> RODBC
loading rodbc
require(RODBC)
## Loading required package: RODBCconnect to database
ch <- odbcConnect(dsn="sakila")Sql Command
sql <- 'SELECT * FROM rental AS t1 JOIN
(SELECT film.*, film_category.category_id FROM
film, film_category WHERE film.film_id=film_category.film_id)
AS t2 ON t2.film_id= (SELECT film_id FROM inventory AS t3
WHERE t3.inventory_id=t1.inventory_id);'Sql Query
customer <- sqlQuery(channel=ch, query=sql)close DB
odbcClose(channel=ch)customer table
customer <- customer[,-c(10,13,20)]
dim(customer)
## [1] 16044 18
require(Hmisc)
## Loading required package: Hmisc
## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
describe(customer)
## customer
##
## 18 Variables 16044 Observations
## ---------------------------------------------------------------------------
## rental_id
## n missing unique Info Mean .05 .10 .25 .50
## 16044 0 16044 1 8025 804.2 1606.3 4013.8 8025.5
## .75 .90 .95
## 12037.2 14443.7 15245.8
##
## lowest : 1 2 3 4 5
## highest: 16045 16046 16047 16048 16049
## ---------------------------------------------------------------------------
## rental_date
## n missing unique
## 16044 0 15815
## Info Mean .05
## 1 2005-07-23 08:12:53 2005-05-29 18:11:35
## .10 .25 .50
## 2005-06-16 06:20:38 2005-07-07 00:58:40 2005-07-28 16:04:32
## .75 .90 .95
## 2005-08-17 21:16:23 2005-08-21 16:44:31 2005-08-22 23:11:48
##
## lowest : 2005-05-24 22:53:30 2005-05-24 22:54:33 2005-05-24 23:03:39 2005-05-24 23:04:41 2005-05-24 23:05:21
## highest: 2005-08-23 22:26:47 2005-08-23 22:42:48 2005-08-23 22:43:07 2005-08-23 22:50:12 2006-02-14 15:16:03
## ---------------------------------------------------------------------------
## inventory_id
## n missing unique Info Mean .05 .10 .25 .50
## 16044 0 4580 1 2292 230 459 1154 2291
## .75 .90 .95
## 3433 4130 4360
##
## lowest : 1 2 3 4 6, highest: 4577 4578 4579 4580 4581
## ---------------------------------------------------------------------------
## customer_id
## n missing unique Info Mean .05 .10 .25 .50
## 16044 0 599 1 297.1 29 58 148 296
## .75 .90 .95
## 446 536 569
##
## lowest : 1 2 3 4 5, highest: 595 596 597 598 599
## ---------------------------------------------------------------------------
## return_date
## n missing unique
## 15861 183 15836
## Info Mean .05
## 1 2005-07-25 23:58:03 2005-06-04 03:23:13
## .10 .25 .50
## 2005-06-20 06:54:02 2005-07-10 15:49:36 2005-08-01 19:45:29
## .75 .90 .95
## 2005-08-20 23:35:55 2005-08-26 18:52:54 2005-08-28 16:10:09
##
## lowest : 2005-05-25 23:55:21 2005-05-26 01:01:46 2005-05-26 02:56:15 2005-05-26 04:42:11 2005-05-26 04:52:23
## highest: 2005-09-01 22:27:31 2005-09-01 23:43:24 2005-09-02 01:28:33 2005-09-02 02:19:33 2005-09-02 02:35:22
## ---------------------------------------------------------------------------
## staff_id
## n missing unique Info Mean
## 16044 0 2 0.75 1.499
##
## 1 (8040, 50%), 2 (8004, 50%)
## ---------------------------------------------------------------------------
## last_update
## n missing unique
## 16044 0 2
## Info Mean
## 0 2006-02-15 21:31:32
##
## 2006-02-15 21:30:53 (16043, 100%)
## 2006-02-23 04:12:08 (1, 0%)
## ---------------------------------------------------------------------------
## film_id
## n missing unique Info Mean .05 .10 .25 .50
## 16044 0 958 1 501.1 51 101 255 496
## .75 .90 .95
## 753 897 951
##
## lowest : 1 2 3 4 5, highest: 996 997 998 999 1000
## ---------------------------------------------------------------------------
## title
## n missing unique
## 16044 0 958
##
## lowest : ACADEMY DINOSAUR ACE GOLDFINGER ADAPTATION HOLES AFFAIR PREJUDICE AFRICAN EGG
## highest: YOUNG LANGUAGE YOUTH KICK ZHIVAGO CORE ZOOLANDER FICTION ZORRO ARK
## ---------------------------------------------------------------------------
## release_year
## n missing unique Info Mean
## 16044 0 1 0 2006
## ---------------------------------------------------------------------------
## language_id
## n missing unique Info Mean
## 16044 0 1 0 1
## ---------------------------------------------------------------------------
## rental_duration
## n missing unique Info Mean
## 16044 0 5 0.96 4.935
##
## 3 4 5 6 7
## Frequency 3412 3251 3165 3392 2824
## % 21 20 20 21 18
## ---------------------------------------------------------------------------
## rental_rate
## n missing unique Info Mean
## 16044 0 3 0.89 2.943
##
## 0.99 (5652, 35%), 2.99 (5120, 32%), 4.99 (5272, 33%)
## ---------------------------------------------------------------------------
## length
## n missing unique Info Mean .05 .10 .25 .50
## 16044 0 140 1 115 52 60 81 114
## .75 .90 .95
## 148 172 179
##
## lowest : 46 47 48 49 50, highest: 181 182 183 184 185
## ---------------------------------------------------------------------------
## replacement_cost
## n missing unique Info Mean .05 .10 .25 .50
## 16044 0 21 1 20.22 10.99 11.99 14.99 20.99
## .75 .90 .95
## 25.99 27.99 29.99
##
## lowest : 9.99 10.99 11.99 12.99 13.99
## highest: 25.99 26.99 27.99 28.99 29.99
## ---------------------------------------------------------------------------
## rating
## n missing unique
## 16044 0 5
##
## G NC-17 PG PG-13 R
## Frequency 2773 3293 3212 3585 3181
## % 17 21 20 22 20
## ---------------------------------------------------------------------------
## special_features
## n missing unique
## 16044 0 15
##
## Behind the Scenes (1114, 7%)
## Commentaries (1101, 7%)
## Commentaries,Behind the Scenes (1093, 7%)
## Commentaries,Deleted Scenes (1020, 6%)
## Commentaries,Deleted Scenes,Behind the Scenes (1112, 7%)
## Deleted Scenes (1037, 6%)
## Deleted Scenes,Behind the Scenes (1049, 7%)
## Trailers (1151, 7%)
## Trailers,Behind the Scenes (1137, 7%)
## Trailers,Commentaries (1140, 7%)
## Trailers,Commentaries,Behind the Scenes (1320, 8%)
## Trailers,Commentaries,Deleted Scenes (922, 6%)
## Trailers,Commentaries,Deleted Scenes,Behind the Scenes (1003, 6%)
## Trailers,Deleted Scenes (1065, 7%)
## Trailers,Deleted Scenes,Behind the Scenes (780, 5%)
## ---------------------------------------------------------------------------
## category_id
## n missing unique Info Mean .05 .10 .25 .50
## 16044 0 16 1 8.364 1 2 4 8
## .75 .90 .95
## 13 15 16
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## Frequency 1112 1166 945 939 941 1050 1060 1096 1033 969 846 830 940 1101
## % 7 7 6 6 6 7 7 7 6 6 5 5 6 7
## 15 16
## Frequency 1179 837
## % 7 5
## ---------------------------------------------------------------------------
customer <- customer[,-11]
ch <- odbcConnect(dsn="sakila")
sql <- 'SELECT category_id,name from category'
category_name <- sqlQuery(channel=ch, query=sql)
odbcClose(channel=ch)
customer$category_id <-as.factor(x=customer$category_id)
levels(customer$category_id) <- category_name$name
require(ggplot2)
p<-ggplot(data=customer, mapping=aes(x=category_id, fill=..count..))
p+geom_bar()
p<-ggplot(data=customer, mapping=aes(x=rating, fill=..count..))
p+geom_bar()