ted<-readxl::read_excel("TED.xlsx")
# View(ted)
sheeturl= "https://docs.google.com/spreadsheets/d/1Yv_9nDl4ocIZR0GXU3OZuBaXxER1blfwR_XHvklPpEM/edit?hl=en&hl=en&hl=en#gid=0"
library(googlesheets)
library(tidyverse)
tedsheet<- sheeturl %>% gs_url()
TED <- tedsheet %>% gs_read()
library (lubridate)
library(htmlwidgets)
library(DT)
library(dplyr)
hour(TED$duration[1])
## [1] 0
minute(TED$duration[1])
## [1] 16
second(TED$duration[1])
## [1] 17
TED <- TED %>% mutate(duration_minutes=(second(duration)+60*minute(duration)+3600*hour(duration))/60)
head(TED[,c("duration","duration_minutes")])
## # A tibble: 6 x 2
## duration duration_minutes
## <time> <dbl>
## 1 16'17" 16.3
## 2 21'26" 21.4
## 3 18'36" 18.6
## 4 19'24" 19.4
## 5 19'50" 19.8
## 6 21'45" 21.8
TED_speaker_metrics <- TED %>% group_by(speaker_name) %>%
summarise(Number_talks = length(speaker_name),
mean_talk_duration = mean(duration_minutes))
head(as.data.frame(TED_speaker_metrics))
## speaker_name Number_talks mean_talk_duration
## 1 Aakash Odedra 1 9.833333
## 2 Aala El-Khani 1 14.266667
## 3 Aaron Huey 1 15.450000
## 4 Aaron Koblin 1 18.300000
## 5 Aaron O'Connell 1 7.850000
## 6 Abe Davis 1 17.950000
TED_speaker_metrics$mean_talk_duration = round(TED_speaker_metrics$mean_talk_duration,2)
datatable(TED_speaker_metrics)
# Part 2a Create bar graphs to show speakers who gave more than 3 talks, such that the height of bars corresponds to the mean talk time of each speaker and the color of the bar corresponds to the number of talks given by each speaker.
library(dplyr)
library(taucharts)
TED_speaker_metrics %>% ggplot(.,aes(Number_talks))+geom_histogram()
TED_speaker_metrics %>% filter(Number_talks>3)%>% ggplot(.,aes(reorder(speaker_name, mean_talk_duration),mean_talk_duration))+geom_bar(stat="identity")+coord_flip()
TED_speaker_metrics %>% filter(Number_talks>3)%>% ggplot(.,aes(reorder(speaker_name, mean_talk_duration),mean_talk_duration))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()
## Scale to highlight difference in data
TED_speaker_metrics %>% filter(Number_talks>3)%>% ggplot(.,aes(reorder(speaker_name, mean_talk_duration),mean_talk_duration, fill=Number_talks))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()
## Grouping speakers number of talks by color
TED_speaker_metrics %>% filter(Number_talks>3)%>% ggplot(.,aes(reorder(speaker_name, mean_talk_duration),mean_talk_duration, fill=as.factor(Number_talks)))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()
TED_speaker_metrics %>% filter(Number_talks>3)%>% ggplot(.,aes(reorder(speaker_name, mean_talk_duration),mean_talk_duration, fill=as.factor(Number_talks)))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()+scale_fill_discrete("Number of talks")
#2b Create bar graphs to show the top 20 tag terms/phrase (based on the frequency of use of each term/phrase) and how frequently they were present in the dataset.
TEDtags <- TED %>% select(tags) %>% separate(tags,c("tag1","tag2","tag3","tag4","tag5","tag6","tag7","tag8","tag9","tag10","tag11","tag12","tag13","tag14","tag15","tag16","tag17","tag18","tag19","tag20","tag21","tag22","tag23","tag24","tag25","tag26","tag27","tag28","tag29","tag30","tag31","tag32","tag33","tag34","tag35","tag36","tag37","tag38","tag39","tag40","tag41","tag42","tag43","tag44","tag45","tag46","tag47","tag48","tag49","tag50"),sep=",") %>%
gather(tagnum, Tag, tag1:tag50) %>%
filter(Tag != "")
head(TEDtags)
## # A tibble: 6 x 2
## tagnum Tag
## <chr> <chr>
## 1 tag1 alternative energy
## 2 tag1 simplicity
## 3 tag1 MacArthur grant
## 4 tag1 children
## 5 tag1 demo
## 6 tag1 entertainment
TEDtags$Tag <- trimws(TEDtags$Tag)
TEDtags$Tag <- tolower(TEDtags$Tag)
tagcount <-TEDtags %>% group_by(Tag)%>%summarise(Tag_count=length(Tag))%>%arrange(-Tag_count)
tagcount$Tag=fct_inorder(tagcount$Tag)
tauchart(tagcount[1:20,]) %>% tau_bar("Tag_count", "Tag", horizontal = "TRUE") %>% tau_legend() %>% tau_tooltip()