- Job Keywords : Go / AWS / IoT / Android
2015年12月18日
開發 Android 影音 APP
由 GA 收集使用者行為資料
由行為資料再產生推薦系統 (未達成)
看過這部影片的也看過
你最近看過的影片跟推薦的影片
沒有User Activities
Netfix Prize [Netflix Prize] (http://www.netflixprize.com/)
2009 , 1 百萬給 BellKor's Pragmatic Chao Team
基於使用者的推薦系統
基於物品的推薦系統
require(RODBC)
## Loading required package: RODBC
ch <- odbcConnect(dsn="sakila")
dsn: data source name
sql <- 'SELECT * FROM rental AS t1 JOIN (SELECT film.*, film_category.category_id FROM film, film_category WHERE film.film_id=film_category.film_id) AS t2 ON t2.film_id= (SELECT film_id FROM inventory AS t3 WHERE t3.inventory_id=t1.inventory_id);'
customer <- sqlQuery(channel=ch, query=sql)
odbcClose(channel=ch)
customer <- customer[,-c(10,13,20)] dim(customer)
## [1] 16044 18
require(Hmisc)
## Loading required package: Hmisc ## Loading required package: grid ## Loading required package: lattice ## Loading required package: survival ## Loading required package: Formula ## Loading required package: ggplot2 ## ## Attaching package: 'Hmisc' ## ## The following objects are masked from 'package:base': ## ## format.pval, round.POSIXt, trunc.POSIXt, units
describe(customer)
## customer ## ## 18 Variables 16044 Observations ## --------------------------------------------------------------------------- ## rental_id ## n missing unique Info Mean .05 .10 .25 .50 ## 16044 0 16044 1 8025 804.2 1606.3 4013.8 8025.5 ## .75 .90 .95 ## 12037.2 14443.7 15245.8 ## ## lowest : 1 2 3 4 5 ## highest: 16045 16046 16047 16048 16049 ## --------------------------------------------------------------------------- ## rental_date ## n missing unique ## 16044 0 15815 ## Info Mean .05 ## 1 2005-07-23 08:12:53 2005-05-29 18:11:35 ## .10 .25 .50 ## 2005-06-16 06:20:38 2005-07-07 00:58:40 2005-07-28 16:04:32 ## .75 .90 .95 ## 2005-08-17 21:16:23 2005-08-21 16:44:31 2005-08-22 23:11:48 ## ## lowest : 2005-05-24 22:53:30 2005-05-24 22:54:33 2005-05-24 23:03:39 2005-05-24 23:04:41 2005-05-24 23:05:21 ## highest: 2005-08-23 22:26:47 2005-08-23 22:42:48 2005-08-23 22:43:07 2005-08-23 22:50:12 2006-02-14 15:16:03 ## --------------------------------------------------------------------------- ## inventory_id ## n missing unique Info Mean .05 .10 .25 .50 ## 16044 0 4580 1 2292 230 459 1154 2291 ## .75 .90 .95 ## 3433 4130 4360 ## ## lowest : 1 2 3 4 6, highest: 4577 4578 4579 4580 4581 ## --------------------------------------------------------------------------- ## customer_id ## n missing unique Info Mean .05 .10 .25 .50 ## 16044 0 599 1 297.1 29 58 148 296 ## .75 .90 .95 ## 446 536 569 ## ## lowest : 1 2 3 4 5, highest: 595 596 597 598 599 ## --------------------------------------------------------------------------- ## return_date ## n missing unique ## 15861 183 15836 ## Info Mean .05 ## 1 2005-07-25 23:58:03 2005-06-04 03:23:13 ## .10 .25 .50 ## 2005-06-20 06:54:02 2005-07-10 15:49:36 2005-08-01 19:45:29 ## .75 .90 .95 ## 2005-08-20 23:35:55 2005-08-26 18:52:54 2005-08-28 16:10:09 ## ## lowest : 2005-05-25 23:55:21 2005-05-26 01:01:46 2005-05-26 02:56:15 2005-05-26 04:42:11 2005-05-26 04:52:23 ## highest: 2005-09-01 22:27:31 2005-09-01 23:43:24 2005-09-02 01:28:33 2005-09-02 02:19:33 2005-09-02 02:35:22 ## --------------------------------------------------------------------------- ## staff_id ## n missing unique Info Mean ## 16044 0 2 0.75 1.499 ## ## 1 (8040, 50%), 2 (8004, 50%) ## --------------------------------------------------------------------------- ## last_update ## n missing unique ## 16044 0 2 ## Info Mean ## 0 2006-02-15 21:31:32 ## ## 2006-02-15 21:30:53 (16043, 100%) ## 2006-02-23 04:12:08 (1, 0%) ## --------------------------------------------------------------------------- ## film_id ## n missing unique Info Mean .05 .10 .25 .50 ## 16044 0 958 1 501.1 51 101 255 496 ## .75 .90 .95 ## 753 897 951 ## ## lowest : 1 2 3 4 5, highest: 996 997 998 999 1000 ## --------------------------------------------------------------------------- ## title ## n missing unique ## 16044 0 958 ## ## lowest : ACADEMY DINOSAUR ACE GOLDFINGER ADAPTATION HOLES AFFAIR PREJUDICE AFRICAN EGG ## highest: YOUNG LANGUAGE YOUTH KICK ZHIVAGO CORE ZOOLANDER FICTION ZORRO ARK ## --------------------------------------------------------------------------- ## release_year ## n missing unique Info Mean ## 16044 0 1 0 2006 ## --------------------------------------------------------------------------- ## language_id ## n missing unique Info Mean ## 16044 0 1 0 1 ## --------------------------------------------------------------------------- ## rental_duration ## n missing unique Info Mean ## 16044 0 5 0.96 4.935 ## ## 3 4 5 6 7 ## Frequency 3412 3251 3165 3392 2824 ## % 21 20 20 21 18 ## --------------------------------------------------------------------------- ## rental_rate ## n missing unique Info Mean ## 16044 0 3 0.89 2.943 ## ## 0.99 (5652, 35%), 2.99 (5120, 32%), 4.99 (5272, 33%) ## --------------------------------------------------------------------------- ## length ## n missing unique Info Mean .05 .10 .25 .50 ## 16044 0 140 1 115 52 60 81 114 ## .75 .90 .95 ## 148 172 179 ## ## lowest : 46 47 48 49 50, highest: 181 182 183 184 185 ## --------------------------------------------------------------------------- ## replacement_cost ## n missing unique Info Mean .05 .10 .25 .50 ## 16044 0 21 1 20.22 10.99 11.99 14.99 20.99 ## .75 .90 .95 ## 25.99 27.99 29.99 ## ## lowest : 9.99 10.99 11.99 12.99 13.99 ## highest: 25.99 26.99 27.99 28.99 29.99 ## --------------------------------------------------------------------------- ## rating ## n missing unique ## 16044 0 5 ## ## G NC-17 PG PG-13 R ## Frequency 2773 3293 3212 3585 3181 ## % 17 21 20 22 20 ## --------------------------------------------------------------------------- ## special_features ## n missing unique ## 16044 0 15 ## ## Behind the Scenes (1114, 7%) ## Commentaries (1101, 7%) ## Commentaries,Behind the Scenes (1093, 7%) ## Commentaries,Deleted Scenes (1020, 6%) ## Commentaries,Deleted Scenes,Behind the Scenes (1112, 7%) ## Deleted Scenes (1037, 6%) ## Deleted Scenes,Behind the Scenes (1049, 7%) ## Trailers (1151, 7%) ## Trailers,Behind the Scenes (1137, 7%) ## Trailers,Commentaries (1140, 7%) ## Trailers,Commentaries,Behind the Scenes (1320, 8%) ## Trailers,Commentaries,Deleted Scenes (922, 6%) ## Trailers,Commentaries,Deleted Scenes,Behind the Scenes (1003, 6%) ## Trailers,Deleted Scenes (1065, 7%) ## Trailers,Deleted Scenes,Behind the Scenes (780, 5%) ## --------------------------------------------------------------------------- ## category_id ## n missing unique Info Mean .05 .10 .25 .50 ## 16044 0 16 1 8.364 1 2 4 8 ## .75 .90 .95 ## 13 15 16 ## ## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 ## Frequency 1112 1166 945 939 941 1050 1060 1096 1033 969 846 830 940 1101 ## % 7 7 6 6 6 7 7 7 6 6 5 5 6 7 ## 15 16 ## Frequency 1179 837 ## % 7 5 ## ---------------------------------------------------------------------------
customer <- customer[,-11]
ch <- odbcConnect(dsn="sakila") sql <- 'SELECT category_id,name from category' category_name <- sqlQuery(channel=ch, query=sql) odbcClose(channel=ch) customer$category_id <-as.factor(x=customer$category_id) levels(customer$category_id) <- category_name$name require(ggplot2)
p<-ggplot(data=customer, mapping=aes(x=category_id, fill=..count..)) p+geom_bar()
p<-ggplot(data=customer, mapping=aes(x=rating, fill=..count..)) p+geom_bar()