ted<-readxl::read_excel("TED.xlsx")
# View(ted)
library(lubridate)
hour(ted$duration[1])
## [1] 0
minute(ted$duration[1])
## [1] 16
second(ted$duration[1])
## [1] 17
library(magrittr)
library(tidyverse)
library(dplyr)
library(tidyr)
library(lubridate)
ted <- ted %>% mutate(duration_minutes=(second(duration)+60*minute(duration)+3600*hour(duration))/60)
head(ted[,c("duration","duration_minutes")])
## # A tibble: 6 x 2
## duration duration_minutes
## <dttm> <dbl>
## 1 1899-12-31 00:16:17 16.3
## 2 1899-12-31 00:21:26 21.4
## 3 1899-12-31 00:18:36 18.6
## 4 1899-12-31 00:19:24 19.4
## 5 1899-12-31 00:19:50 19.8
## 6 1899-12-31 00:21:45 21.8
ted_speaker_metrics <- ted%>%group_by(speaker_name)%>%
summarise(Number_talks=length(speaker_name),
Mean_talk_duration=mean(duration_minutes))
head(as.data.frame(ted_speaker_metrics))
## speaker_name Number_talks Mean_talk_duration
## 1 Aakash Odedra 1 9.833333
## 2 Aala El-Khani 1 14.266667
## 3 Aaron Huey 1 15.450000
## 4 Aaron Koblin 1 18.300000
## 5 Aaron O'Connell 1 7.850000
## 6 Abe Davis 1 17.950000
ted_speaker_metrics$Mean_talk_duration <-
round(ted_speaker_metrics$Mean_talk_duration,2)
library(htmlwidgets)
library(DT)
datatable(ted_speaker_metrics)
Create bar graphs to:
ted_speaker_metrics %>%
ggplot(.,aes(Number_talks))+geom_histogram()
ted_speaker_metrics %>% filter(Number_talks>3)%>%
ggplot(.,aes(reorder(speaker_name,
Mean_talk_duration),Mean_talk_duration))+geom_bar(stat = "identity")+coord_flip()
ted_speaker_metrics %>% filter(Number_talks>3)%>%
ggplot(.,aes(reorder(speaker_name,
Mean_talk_duration),Mean_talk_duration, fill=as.factor(Number_talks)))+geom_bar(stat = "identity")+coord_flip()+labs(x="",y="Mean Talk Duration")+theme_bw()
ted_speaker_metrics %>% filter(Number_talks>3)%>%
ggplot(.,aes(reorder(speaker_name,
Mean_talk_duration),Mean_talk_duration, fill=as.factor(Number_talks)))+geom_bar(stat = "identity")+coord_flip()+labs(x="",y="Mean Talk Duration")+theme_bw()+scale_fill_discrete("Number of Talks")
# install.packages("plotly")
library(plotly)
ggobject <- ted_speaker_metrics %>% filter(Number_talks>3)%>%
ggplot(.,aes(reorder(speaker_name,
Mean_talk_duration),Mean_talk_duration, fill=as.factor(Number_talks)))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean Talk Duration")+theme_bw()+scale_fill_discrete("Number of Talks")
ggobject
ggplotly(ggobject)
ted_speaker_metrics %>% filter(Number_talks>3)%>%
plot_ly(x=~Mean_talk_duration,y=~speaker_name,color=~as.factor(Number_talks))
ted_speaker_metrics %>% filter(Number_talks>3)%>%
plot_ly(x=~Mean_talk_duration,y=~reorder(speaker_name,Mean_talk_duration),color=~as.factor(Number_talks),type="bar") %>% layout(title="Speakers with more than 3 Ted Talks", yaxis=list(title=""),xaxis=list(title="Mean talk distribution"))
library(taucharts)
tmp= ted_speaker_metrics %>% filter(Number_talks>3)
tauchart(tmp) %>%
tau_bar("Mean_talk_duration","speaker_name",color="Number_Talks",horizontal = "TRUE") %>% tau_legend() %>% tau_tooltip()
tmp= ted_speaker_metrics %>%
filter(Number_talks>3)%>%arrange(-Mean_talk_duration)
tmp$speaker_name=fct_inorder(tmp$speaker_name)
tauchart(tmp)%>%tau_bar("Mean_talk_duration","speaker_name", color="Number_talks",horizontal ="TRUE")%>% tau_legend()%>% tau_tooltip()
#stringr
tedtags <- ted %>% select(tags) %>%
separate(tags,c("tag1","tag2","tag3","tag4","tag5","tag6","tag7","tag8","tag9","tag10","tag11","tag12","tag13","tag14","tag15","tag16","tag17","tag18","tag19","tag20","tag21","tag22","tag23","tag24","tag25","tag26","tag27","tag28","tag29","tag30","tag31","tag32","tag33","tag34","tag35","tag36","tag37","tag38","tag39","tag40","tag41","tag42","tag43","tag44","tag45","tag46","tag47","tag48","tag49","tag50"),sep=",")%>%
gather(tagnum, tag, tag1:tag50)
head(tedtags)
## # A tibble: 6 x 2
## tagnum tag
## <chr> <chr>
## 1 tag1 alternative energy
## 2 tag1 simplicity
## 3 tag1 MacArthur grant
## 4 tag1 children
## 5 tag1 demo
## 6 tag1 entertainment
# "alternative energy" is different from " alternative energy", "alternative energy "
tedtags$tag <- trimws(tedtags$tag)
tedtags$tag <- tolower(tedtags$tag)