ted<-readxl::read_excel("TED.xlsx")
# View(ted)

1. Create an interactive table that shows the total number of talks given by an individual and the average duration of all their talks. Hence, there should be three columns in the table: The name, the number of talks, and the mean of the talk time (in minutes) for all their talks.

library(lubridate)

hour(ted$duration[1])
## [1] 0
minute(ted$duration[1])
## [1] 16
second(ted$duration[1])
## [1] 17
library(magrittr)
library(tidyverse)
library(dplyr)
library(tidyr)
library(lubridate)
ted <- ted %>% mutate(duration_minutes=(second(duration)+60*minute(duration)+3600*hour(duration))/60)
head(ted[,c("duration","duration_minutes")])
## # A tibble: 6 x 2
##   duration            duration_minutes
##   <dttm>                         <dbl>
## 1 1899-12-31 00:16:17             16.3
## 2 1899-12-31 00:21:26             21.4
## 3 1899-12-31 00:18:36             18.6
## 4 1899-12-31 00:19:24             19.4
## 5 1899-12-31 00:19:50             19.8
## 6 1899-12-31 00:21:45             21.8
ted_speaker_metrics <- ted%>%group_by(speaker_name)%>%
  summarise(Number_talks=length(speaker_name),
            Mean_talk_duration=mean(duration_minutes))
head(as.data.frame(ted_speaker_metrics))
##      speaker_name Number_talks Mean_talk_duration
## 1   Aakash Odedra            1           9.833333
## 2   Aala El-Khani            1          14.266667
## 3      Aaron Huey            1          15.450000
## 4    Aaron Koblin            1          18.300000
## 5 Aaron O'Connell            1           7.850000
## 6       Abe Davis            1          17.950000
ted_speaker_metrics$Mean_talk_duration <-
  round(ted_speaker_metrics$Mean_talk_duration,2)
library(htmlwidgets)
library(DT)

datatable(ted_speaker_metrics)

Question 2

Create bar graphs to:

  1. show speakers gave more than 3 talks, such that the height of bars corresponds to the mean talk time of each speaker and the color of the bar corresponds to the number of talks given by each speaker.
  2. show the top 20 tag terms/phrase (based on the frequency of use of each term/phrase and how frequently they were present in the dataset.
ted_speaker_metrics %>%
  ggplot(.,aes(Number_talks))+geom_histogram()

ted_speaker_metrics %>% filter(Number_talks>3)%>%
  ggplot(.,aes(reorder(speaker_name,
Mean_talk_duration),Mean_talk_duration))+geom_bar(stat = "identity")+coord_flip()

ted_speaker_metrics %>% filter(Number_talks>3)%>%
  ggplot(.,aes(reorder(speaker_name,
Mean_talk_duration),Mean_talk_duration, fill=as.factor(Number_talks)))+geom_bar(stat = "identity")+coord_flip()+labs(x="",y="Mean Talk Duration")+theme_bw()

ted_speaker_metrics %>% filter(Number_talks>3)%>%
  ggplot(.,aes(reorder(speaker_name,
  Mean_talk_duration),Mean_talk_duration, fill=as.factor(Number_talks)))+geom_bar(stat = "identity")+coord_flip()+labs(x="",y="Mean Talk Duration")+theme_bw()+scale_fill_discrete("Number of Talks")

# install.packages("plotly")
library(plotly)


ggobject <- ted_speaker_metrics %>% filter(Number_talks>3)%>%
  ggplot(.,aes(reorder(speaker_name,
Mean_talk_duration),Mean_talk_duration, fill=as.factor(Number_talks)))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean Talk Duration")+theme_bw()+scale_fill_discrete("Number of Talks")

ggobject

ggplotly(ggobject)
ted_speaker_metrics %>% filter(Number_talks>3)%>%
  plot_ly(x=~Mean_talk_duration,y=~speaker_name,color=~as.factor(Number_talks))
ted_speaker_metrics %>% filter(Number_talks>3)%>%
  plot_ly(x=~Mean_talk_duration,y=~reorder(speaker_name,Mean_talk_duration),color=~as.factor(Number_talks),type="bar") %>% layout(title="Speakers with more than 3 Ted Talks", yaxis=list(title=""),xaxis=list(title="Mean talk distribution"))
library(taucharts)

tmp= ted_speaker_metrics %>% filter(Number_talks>3)

tauchart(tmp) %>%
  tau_bar("Mean_talk_duration","speaker_name",color="Number_Talks",horizontal = "TRUE") %>% tau_legend() %>% tau_tooltip()
tmp= ted_speaker_metrics %>%
  filter(Number_talks>3)%>%arrange(-Mean_talk_duration)

tmp$speaker_name=fct_inorder(tmp$speaker_name)

tauchart(tmp)%>%tau_bar("Mean_talk_duration","speaker_name", color="Number_talks",horizontal ="TRUE")%>% tau_legend()%>% tau_tooltip()
#stringr

tedtags <- ted %>% select(tags) %>%
separate(tags,c("tag1","tag2","tag3","tag4","tag5","tag6","tag7","tag8","tag9","tag10","tag11","tag12","tag13","tag14","tag15","tag16","tag17","tag18","tag19","tag20","tag21","tag22","tag23","tag24","tag25","tag26","tag27","tag28","tag29","tag30","tag31","tag32","tag33","tag34","tag35","tag36","tag37","tag38","tag39","tag40","tag41","tag42","tag43","tag44","tag45","tag46","tag47","tag48","tag49","tag50"),sep=",")%>%
  gather(tagnum, tag, tag1:tag50)
head(tedtags)
## # A tibble: 6 x 2
##   tagnum tag               
##   <chr>  <chr>             
## 1 tag1   alternative energy
## 2 tag1   simplicity        
## 3 tag1   MacArthur grant   
## 4 tag1   children          
## 5 tag1   demo              
## 6 tag1   entertainment
# "alternative energy" is different from " alternative energy", "alternative energy "

tedtags$tag <- trimws(tedtags$tag)
tedtags$tag <- tolower(tedtags$tag)