ted<-readxl::read_excel("ted.xlsx")
# install.packages("googlesheets")
sheeturl="https://docs.google.com/spreadsheets/d/1Yv_9nDl4ocIZR0GXU3OZuBaXxER1blfwR_XHvklPpEM/edit?hl=en&hl=en&hl=en#gid=0"
library(tidyverse)
library(googlesheets)
tedsheet <- sheeturl %>% gs_url()
TED <- tedsheet %>% gs_read()
library(dplyr)
# install.packages("lubridate")
library(lubridate)
hour(ted$duration[1])
## [1] 0
minute(ted$duration[1])
## [1] 16
second(ted$duration[1])
## [1] 17
TED <- TED %>% mutate(duration_minutes=(second(duration)+60*minute(duration)+3600*hour(duration))/60)
head(TED[,c("duration","duration_minutes")])
## # A tibble: 6 x 2
## duration duration_minutes
## <time> <dbl>
## 1 16'17" 16.3
## 2 21'26" 21.4
## 3 18'36" 18.6
## 4 19'24" 19.4
## 5 19'50" 19.8
## 6 21'45" 21.8
TED_speaker_metrics <- TED%>%group_by(speaker_name)%>%
summarise(Number_talks=length(speaker_name), Mean_talk_duration=mean(duration_minutes))
head(as.data.frame(TED_speaker_metrics))
## speaker_name Number_talks Mean_talk_duration
## 1 Aakash Odedra 1 9.833333
## 2 Aala El-Khani 1 14.266667
## 3 Aaron Huey 1 15.450000
## 4 Aaron Koblin 1 18.300000
## 5 Aaron O'Connell 1 7.850000
## 6 Abe Davis 1 17.950000
TED_speaker_metrics$Mean_talk_duration <- round(TED_speaker_metrics$Mean_talk_duration,2)
# install.packages("DT")
library(DT)
library(htmlwidgets)
datatable(TED_speaker_metrics)
Create graphs to show speakers who gave more than 3 talks and show the top 20 tag terms.
library(ggplot2)
library(tidyverse)
TED_speaker_metrics %>%
ggplot(.,aes(Number_talks))+geom_histogram()
Filter for more than 3 talks.
TED_speaker_metrics %>% filter(Number_talks>3)%>%
ggplot(.,aes(reorder(speaker_name, Mean_talk_duration),Mean_talk_duration))+geom_bar(stat="identity")+coord_flip()
TED_speaker_metrics %>% filter(Number_talks>3)%>%
ggplot(.,aes(reorder(speaker_name, Mean_talk_duration),Mean_talk_duration))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()
TED_speaker_metrics %>% filter(Number_talks>3)%>%
ggplot(.,aes(reorder(speaker_name, Mean_talk_duration),Mean_talk_duration))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()
TED_speaker_metrics %>% filter(Number_talks>3)%>%
ggplot(.,aes(reorder(speaker_name, Mean_talk_duration),Mean_talk_duration, fill=Number_talks))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()
TED_speaker_metrics %>% filter(Number_talks>3)%>%
ggplot(.,aes(reorder(speaker_name, Mean_talk_duration),Mean_talk_duration, fill=as.factor(Number_talks)))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()
TED_speaker_metrics %>% filter(Number_talks>3)%>%
ggplot(.,aes(reorder(speaker_name, Mean_talk_duration),Mean_talk_duration, fill=as.factor(Number_talks)))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()+scale_fill_discrete("Number of talks")
# install.packages("plotly")
library(plotly)
library(dplyr)
ggobject<-TED_speaker_metrics %>% filter(Number_talks>3)%>%
ggplot(.,aes(reorder(speaker_name, Mean_talk_duration),Mean_talk_duration, fill=as.factor(Number_talks)))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()+scale_fill_discrete("Number of talks")
ggobject
# library(plotly)
# install.packages("Cairo")
# library(Cairo)
# install
ggplotly(ggobject)
TED_speaker_metrics %>% filter(Number_talks>3)%>%
plot_ly(x=~Mean_talk_duration,y=~speaker_name,color=~as.factor(Number_talks))
TED_speaker_metrics %>% filter(Number_talks>3)%>%
plot_ly(x=~Mean_talk_duration,y=~reorder(speaker_name,Mean_talk_duration),color=~as.factor(Number_talks),type="bar")%>%
layout(title="speakers with more than 3 Ted Talks", yaxis=list(title=""),xaxis=list(title="Mean talk duration"))
#devtools::install_github("hrbrmstr/taucharts")
library(taucharts)
head(TED$tags)
## [1] "alternative energy,cars,global issues,climate change,environment,science,culture,sustainability,technology"
## [2] "simplicity,entertainment,interface design,software,media,computers,technology,music,performance"
## [3] "MacArthur grant,cities,green,activism,politics,pollution,environment,inequality,business"
## [4] "children,teaching,creativity,parenting,culture,dance,education"
## [5] "demo,Asia,global issues,visualizations,global development,statistics,math,health,economics,Google,Africa"
## [6] "entertainment,goal-setting,potential,psychology,motivation,emotions,culture,business"
#stringr
TEDtags <- TED %>% select(tags) %>%
separate(tags,c("tag1", "tag2", "tag3", "tag4", "tag5", "tag6", "tag7", "tag8", "tag9", "tag10","tag11", "tag12", "tag13", "tag14", "tag15", "tag16", "tag17", "tag18", "tag19", "tag20","tag21", "tag22", "tag23", "tag24", "tag25", "tag26", "tag27", "tag28", "tag29", "tag30","tag31", "tag32", "tag33", "tag34", "tag35", "tag36", "tag37", "tag38", "tag39", "tag40","tag41", "tag42", "tag43", "tag44", "tag45", "tag46", "tag47", "tag48", "tag49", "tag50"),sep=",") %>%
gather(tagnum, Tag, tag1:tag50) %>%
filter(Tag != "")
head(TEDtags)
## # A tibble: 6 x 2
## tagnum Tag
## <chr> <chr>
## 1 tag1 alternative energy
## 2 tag1 simplicity
## 3 tag1 MacArthur grant
## 4 tag1 children
## 5 tag1 demo
## 6 tag1 entertainment
TEDtags$Tag <- trimws(TEDtags$Tag)
TEDtags$Tag <- tolower(TEDtags$Tag)
tagcount <-TEDtags %>%
group_by(Tag)%>%summarise(Tag_count=length(Tag))%>%arrange(-Tag_count)
tagcount$Tag=fct_inorder(tagcount$Tag)
# dataframename[rows,columns]
tauchart(tagcount[1:20,])%>%tau_bar("Tag_count","Tag",horizontal = "TRUE")%>% tau_legend()%>% tau_tooltip()