1 영상 길이와 Longwinded의 관계

#library
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
t_longwinded = read.csv("t_dur_160504.csv", sep=",", header=TRUE)
str(t_longwinded)
## 'data.frame':    1092 obs. of  32 variables:
##  $ newst       : Factor w/ 1092 levels "p.1-17","p.1-19",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ name        : Factor w/ 1009 levels "Aakash Odedra",..: 37 553 11 648 478 163 971 557 156 966 ...
##  $ role_origin : Factor w/ 757 levels "","9/11 mothers",..: 378 593 390 312 206 21 566 479 567 574 ...
##  $ title       : Factor w/ 1092 levels " How synchronized hammer strikes could generate nuclear fusion",..: 45 1085 89 48 339 792 922 858 921 948 ...
##  $ position    : Factor w/ 202 levels "Arbejdsglaede Live",..: 57 21 57 57 57 57 57 109 69 57 ...
##  $ dur         : Factor w/ 703 levels "10:02","10:03",..: 374 94 236 29 231 343 368 47 235 381 ...
##  $ dur_sec     : int  1145 747 957 654 951 1099 1134 681 955 1158 ...
##  $ yyyymmdd    : Factor w/ 104 levels "1972-05-01","1984-02-01",..: 104 102 104 104 104 104 104 93 103 104 ...
##  $ sub         : Factor w/ 49 levels "10","11","12",..: 31 46 41 1 4 46 3 47 46 48 ...
##  $ totalViews  : int  712809 1151849 852920 683227 1090977 661699 1006802 1043401 1086686 838626 ...
##  $ topics      : Factor w/ 1083 levels "Activism__Film__Global issues__Online video__Social change",..: 578 86 369 602 169 497 232 941 1082 226 ...
##  $ Beautiful   : int  97 44 158 35 56 407 481 95 143 6 ...
##  $ OK          : int  61 29 14 44 25 33 32 36 29 52 ...
##  $ Funny       : int  10 344 6 16 62 56 29 4 41 15 ...
##  $ Unconvincing: int  42 18 2 45 11 54 5 0 19 51 ...
##  $ Fascinating : int  323 302 81 211 185 119 151 52 284 96 ...
##  $ Informative : int  177 302 264 121 181 63 209 109 221 235 ...
##  $ Ingenious   : int  92 51 33 88 75 58 56 8 167 61 ...
##  $ Persuasive  : int  11 37 299 14 70 214 263 2 50 71 ...
##  $ Inspiring   : int  170 113 474 135 354 722 870 28 369 140 ...
##  $ Courageous  : int  6 11 198 15 47 340 124 2 23 19 ...
##  $ Obnoxious   : int  25 15 3 12 13 16 8 1 6 33 ...
##  $ Confusing   : int  11 4 1 10 3 45 15 10 3 9 ...
##  $ Longwinded  : int  41 22 3 8 12 28 1 7 13 13 ...
##  $ Jaw.dropping: int  208 64 19 79 8 49 40 3 77 14 ...
##  $ role        : Factor w/ 10 levels "Art","Explorer",..: 9 9 7 4 1 7 10 9 3 4 ...
##  $ posValue    : int  1155 1297 1546 758 1063 2061 2255 339 1404 709 ...
##  $ negValue    : int  119 59 9 75 39 143 29 18 41 106 ...
##  $ totalRate   : int  1274 1356 1555 833 1102 2204 2284 357 1445 815 ...
##  $ meanValues  : int  61 29 14 44 25 33 32 36 29 52 ...
##  $ posPer      : num  90.7 95.6 99.4 91 96.5 ...
##  $ negPer      : num  9.341 4.351 0.579 9.004 3.539 ...

1.1 ggplot

t_longwinded$Longwinded_rate = (t_longwinded$Longwinded/t_longwinded$totalRate)*100 #전체 rate에서 longwinded가 차지하는 비율


ggplot(t_longwinded, aes(x=Longwinded)) + geom_density()

ggplot(t_longwinded, aes(x=Longwinded_rate)) + geom_density()

1.2 x:영상길이, y: 지루함 평가비율

  • cor : 0.3061096 #Longwinded_rate
  • cor : 0.3818343 #Longwinded
ggplot(t_longwinded, aes(x=dur_sec, y=Longwinded_rate)) + geom_point(stat="identity") + geom_smooth(method="lm")

1.3 x:영상길이, y: 조회수

  • cor : -0.01219105 #totalViews
ggplot(t_longwinded, aes(x=dur_sec, y=totalViews)) + geom_point(stat="identity") + geom_smooth(method="lm")

1.4 x:영상길이, y: 긍정적인 평가비율(%)

  • cor : -0.02566225 #posPer
ggplot(t_longwinded, aes(x=dur_sec, y=posPer)) + geom_point(stat="identity") + geom_smooth(method="lm")

1.5 x:영상길이, y: 부정적인 평가비율(%)

  • cor : 0.02566225 #negPer
ggplot(t_longwinded, aes(x=dur_sec, y=negPer)) + geom_point(stat="identity") + geom_smooth(method="lm")

install.packages(“texlive-latex-extra”) ## 상관계쑤

#cor(t_longwinded$dur_sec, t_longwinded$Longwinded_rate)
#cor(t_longwinded$dur_sec, t_longwinded$Longwinded)
#cor(t_longwinded$dur_sec, t_longwinded$totalViews)
#cor(t_longwinded$dur_sec, t_longwinded$posPer)
#cor(t_longwinded$dur_sec, t_longwinded$negPer)