#library
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
t_longwinded = read.csv("t_dur_160504.csv", sep=",", header=TRUE)
str(t_longwinded)
## 'data.frame': 1092 obs. of 32 variables:
## $ newst : Factor w/ 1092 levels "p.1-17","p.1-19",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ name : Factor w/ 1009 levels "Aakash Odedra",..: 37 553 11 648 478 163 971 557 156 966 ...
## $ role_origin : Factor w/ 757 levels "","9/11 mothers",..: 378 593 390 312 206 21 566 479 567 574 ...
## $ title : Factor w/ 1092 levels " How synchronized hammer strikes could generate nuclear fusion",..: 45 1085 89 48 339 792 922 858 921 948 ...
## $ position : Factor w/ 202 levels "Arbejdsglaede Live",..: 57 21 57 57 57 57 57 109 69 57 ...
## $ dur : Factor w/ 703 levels "10:02","10:03",..: 374 94 236 29 231 343 368 47 235 381 ...
## $ dur_sec : int 1145 747 957 654 951 1099 1134 681 955 1158 ...
## $ yyyymmdd : Factor w/ 104 levels "1972-05-01","1984-02-01",..: 104 102 104 104 104 104 104 93 103 104 ...
## $ sub : Factor w/ 49 levels "10","11","12",..: 31 46 41 1 4 46 3 47 46 48 ...
## $ totalViews : int 712809 1151849 852920 683227 1090977 661699 1006802 1043401 1086686 838626 ...
## $ topics : Factor w/ 1083 levels "Activism__Film__Global issues__Online video__Social change",..: 578 86 369 602 169 497 232 941 1082 226 ...
## $ Beautiful : int 97 44 158 35 56 407 481 95 143 6 ...
## $ OK : int 61 29 14 44 25 33 32 36 29 52 ...
## $ Funny : int 10 344 6 16 62 56 29 4 41 15 ...
## $ Unconvincing: int 42 18 2 45 11 54 5 0 19 51 ...
## $ Fascinating : int 323 302 81 211 185 119 151 52 284 96 ...
## $ Informative : int 177 302 264 121 181 63 209 109 221 235 ...
## $ Ingenious : int 92 51 33 88 75 58 56 8 167 61 ...
## $ Persuasive : int 11 37 299 14 70 214 263 2 50 71 ...
## $ Inspiring : int 170 113 474 135 354 722 870 28 369 140 ...
## $ Courageous : int 6 11 198 15 47 340 124 2 23 19 ...
## $ Obnoxious : int 25 15 3 12 13 16 8 1 6 33 ...
## $ Confusing : int 11 4 1 10 3 45 15 10 3 9 ...
## $ Longwinded : int 41 22 3 8 12 28 1 7 13 13 ...
## $ Jaw.dropping: int 208 64 19 79 8 49 40 3 77 14 ...
## $ role : Factor w/ 10 levels "Art","Explorer",..: 9 9 7 4 1 7 10 9 3 4 ...
## $ posValue : int 1155 1297 1546 758 1063 2061 2255 339 1404 709 ...
## $ negValue : int 119 59 9 75 39 143 29 18 41 106 ...
## $ totalRate : int 1274 1356 1555 833 1102 2204 2284 357 1445 815 ...
## $ meanValues : int 61 29 14 44 25 33 32 36 29 52 ...
## $ posPer : num 90.7 95.6 99.4 91 96.5 ...
## $ negPer : num 9.341 4.351 0.579 9.004 3.539 ...
t_longwinded$Longwinded_rate = (t_longwinded$Longwinded/t_longwinded$totalRate)*100 #전체 rate에서 longwinded가 차지하는 비율
ggplot(t_longwinded, aes(x=Longwinded)) + geom_density()
ggplot(t_longwinded, aes(x=Longwinded_rate)) + geom_density()
ggplot(t_longwinded, aes(x=dur_sec, y=Longwinded_rate)) + geom_point(stat="identity") + geom_smooth(method="lm")
ggplot(t_longwinded, aes(x=dur_sec, y=totalViews)) + geom_point(stat="identity") + geom_smooth(method="lm")
ggplot(t_longwinded, aes(x=dur_sec, y=posPer)) + geom_point(stat="identity") + geom_smooth(method="lm")
ggplot(t_longwinded, aes(x=dur_sec, y=negPer)) + geom_point(stat="identity") + geom_smooth(method="lm")
install.packages(“texlive-latex-extra”) ## 상관계쑤
#cor(t_longwinded$dur_sec, t_longwinded$Longwinded_rate)
#cor(t_longwinded$dur_sec, t_longwinded$Longwinded)
#cor(t_longwinded$dur_sec, t_longwinded$totalViews)
#cor(t_longwinded$dur_sec, t_longwinded$posPer)
#cor(t_longwinded$dur_sec, t_longwinded$negPer)