ted<-readxl::read_excel("ted.xlsx")
# install.packages("googlesheets")
sheeturl="https://docs.google.com/spreadsheets/d/1Yv_9nDl4ocIZR0GXU3OZuBaXxER1blfwR_XHvklPpEM/edit?hl=en&hl=en&hl=en#gid=0"
library(tidyverse)
library(googlesheets)
tedsheet <- sheeturl %>% gs_url()
TED <- tedsheet %>% gs_read()

1 Create an interactive table that shows the total number of talks given by an indiviudal and the average duration of all of their talks.

library(dplyr)
# install.packages("lubridate")
library(lubridate)

hour(ted$duration[1])
## [1] 0
minute(ted$duration[1])
## [1] 16
second(ted$duration[1])
## [1] 17
TED <- TED %>% mutate(duration_minutes=(second(duration)+60*minute(duration)+3600*hour(duration))/60)
head(TED[,c("duration","duration_minutes")])
## # A tibble: 6 x 2
##   duration duration_minutes
##   <time>              <dbl>
## 1 16'17"               16.3
## 2 21'26"               21.4
## 3 18'36"               18.6
## 4 19'24"               19.4
## 5 19'50"               19.8
## 6 21'45"               21.8
TED_speaker_metrics <- TED%>%group_by(speaker_name)%>%
  summarise(Number_talks=length(speaker_name), Mean_talk_duration=mean(duration_minutes)) 
head(as.data.frame(TED_speaker_metrics))
##      speaker_name Number_talks Mean_talk_duration
## 1   Aakash Odedra            1           9.833333
## 2   Aala El-Khani            1          14.266667
## 3      Aaron Huey            1          15.450000
## 4    Aaron Koblin            1          18.300000
## 5 Aaron O'Connell            1           7.850000
## 6       Abe Davis            1          17.950000

Round to two decimals

TED_speaker_metrics$Mean_talk_duration <- round(TED_speaker_metrics$Mean_talk_duration,2)
# install.packages("DT")
library(DT)
library(htmlwidgets)
datatable(TED_speaker_metrics)

Question 2

Create graphs to show speakers who gave more than 3 talks and show the top 20 tag terms.

library(ggplot2)
library(tidyverse)
TED_speaker_metrics %>%
  ggplot(.,aes(Number_talks))+geom_histogram()

Filter for more than 3 talks.

TED_speaker_metrics %>% filter(Number_talks>3)%>%
  ggplot(.,aes(reorder(speaker_name, Mean_talk_duration),Mean_talk_duration))+geom_bar(stat="identity")+coord_flip()

TED_speaker_metrics %>% filter(Number_talks>3)%>%
  ggplot(.,aes(reorder(speaker_name, Mean_talk_duration),Mean_talk_duration))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()

TED_speaker_metrics %>% filter(Number_talks>3)%>%
  ggplot(.,aes(reorder(speaker_name, Mean_talk_duration),Mean_talk_duration))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()

TED_speaker_metrics %>% filter(Number_talks>3)%>%
  ggplot(.,aes(reorder(speaker_name, Mean_talk_duration),Mean_talk_duration, fill=Number_talks))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()

TED_speaker_metrics %>% filter(Number_talks>3)%>%
  ggplot(.,aes(reorder(speaker_name, Mean_talk_duration),Mean_talk_duration, fill=as.factor(Number_talks)))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()

TED_speaker_metrics %>% filter(Number_talks>3)%>%
  ggplot(.,aes(reorder(speaker_name, Mean_talk_duration),Mean_talk_duration, fill=as.factor(Number_talks)))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()+scale_fill_discrete("Number of talks")

# install.packages("plotly")
library(plotly)
library(dplyr)
ggobject<-TED_speaker_metrics %>% filter(Number_talks>3)%>%
  ggplot(.,aes(reorder(speaker_name, Mean_talk_duration),Mean_talk_duration, fill=as.factor(Number_talks)))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()+scale_fill_discrete("Number of talks")

ggobject

# library(plotly)
# install.packages("Cairo")
# library(Cairo)
# install

ggplotly(ggobject)
TED_speaker_metrics %>% filter(Number_talks>3)%>%
  plot_ly(x=~Mean_talk_duration,y=~speaker_name,color=~as.factor(Number_talks))
TED_speaker_metrics %>% filter(Number_talks>3)%>%
  plot_ly(x=~Mean_talk_duration,y=~reorder(speaker_name,Mean_talk_duration),color=~as.factor(Number_talks),type="bar")%>%
  layout(title="speakers with more than 3 Ted Talks", yaxis=list(title=""),xaxis=list(title="Mean talk duration"))
#devtools::install_github("hrbrmstr/taucharts")
library(taucharts)

show the top 20 tag terms/phrase (based on the frequency of use of each term/phrase) and how frequently they were present in the dataset.

head(TED$tags)
## [1] "alternative energy,cars,global issues,climate change,environment,science,culture,sustainability,technology"
## [2] "simplicity,entertainment,interface design,software,media,computers,technology,music,performance"           
## [3] "MacArthur grant,cities,green,activism,politics,pollution,environment,inequality,business"                  
## [4] "children,teaching,creativity,parenting,culture,dance,education"                                            
## [5] "demo,Asia,global issues,visualizations,global development,statistics,math,health,economics,Google,Africa"  
## [6] "entertainment,goal-setting,potential,psychology,motivation,emotions,culture,business"
#stringr

TEDtags <- TED %>% select(tags) %>%
  separate(tags,c("tag1", "tag2", "tag3", "tag4", "tag5", "tag6", "tag7", "tag8", "tag9", "tag10","tag11", "tag12", "tag13", "tag14", "tag15", "tag16", "tag17", "tag18", "tag19", "tag20","tag21", "tag22", "tag23", "tag24", "tag25", "tag26", "tag27", "tag28", "tag29", "tag30","tag31", "tag32", "tag33", "tag34", "tag35", "tag36", "tag37", "tag38", "tag39", "tag40","tag41", "tag42", "tag43", "tag44", "tag45", "tag46", "tag47", "tag48", "tag49", "tag50"),sep=",") %>%
  gather(tagnum, Tag, tag1:tag50) %>%
  filter(Tag != "")
head(TEDtags)
## # A tibble: 6 x 2
##   tagnum Tag               
##   <chr>  <chr>             
## 1 tag1   alternative energy
## 2 tag1   simplicity        
## 3 tag1   MacArthur grant   
## 4 tag1   children          
## 5 tag1   demo              
## 6 tag1   entertainment
TEDtags$Tag <- trimws(TEDtags$Tag)
TEDtags$Tag <- tolower(TEDtags$Tag)
tagcount <-TEDtags %>%
  group_by(Tag)%>%summarise(Tag_count=length(Tag))%>%arrange(-Tag_count)

tagcount$Tag=fct_inorder(tagcount$Tag)
# dataframename[rows,columns] 
tauchart(tagcount[1:20,])%>%tau_bar("Tag_count","Tag",horizontal = "TRUE")%>% tau_legend()%>% tau_tooltip()