Part 1: Create an interactive table that shows the total number of talks given by an individual and the average duration of all their talks. Hence, there should be three columns in the table: The name, the number of talks, and the mean of the talk time (in minutes) for all their talks.

library (lubridate)
library(htmlwidgets)
library(DT)
library(dplyr)

hour(TED$duration[1])

## [1] 0

minute(TED$duration[1])

## [1] 16

second(TED$duration[1])

## [1] 17

Find the duration of the talks in minutes

TED <- TED %>% mutate(duration_minutes=(second(duration)+60*minute(duration)+3600*hour(duration))/60)

Check that it worked

head(TED[,c("duration","duration_minutes")])

## # A tibble: 6 x 2
##   duration duration_minutes
##   <time>              <dbl>
## 1 16'17"               16.3
## 2 21'26"               21.4
## 3 18'36"               18.6
## 4 19'24"               19.4
## 5 19'50"               19.8
## 6 21'45"               21.8

Compute total number of talks for each speaker and the average minutes per talk per speaker.

TED_speaker_metrics <- TED %>% group_by(speaker_name) %>%
  summarise(Number_talks = length(speaker_name),
            mean_talk_duration = mean(duration_minutes))

head(as.data.frame(TED_speaker_metrics))

##      speaker_name Number_talks mean_talk_duration
## 1   Aakash Odedra            1           9.833333
## 2   Aala El-Khani            1          14.266667
## 3      Aaron Huey            1          15.450000
## 4    Aaron Koblin            1          18.300000
## 5 Aaron O'Connell            1           7.850000
## 6       Abe Davis            1          17.950000

TED_speaker_metrics$mean_talk_duration = round(TED_speaker_metrics$mean_talk_duration,2)

Interactive table to show the new dataframe of the metrics

datatable(TED_speaker_metrics)

# Part 2a Create bar graphs to show speakers who gave more than 3 talks, such that the height of bars corresponds to the mean talk time of each speaker and the color of the bar corresponds to the number of talks given by each speaker.

library(dplyr)
library(taucharts)

TED_speaker_metrics %>% ggplot(.,aes(Number_talks))+geom_histogram()

TED_speaker_metrics %>% filter(Number_talks>3)%>% ggplot(.,aes(reorder(speaker_name, mean_talk_duration),mean_talk_duration))+geom_bar(stat="identity")+coord_flip()

TED_speaker_metrics %>% filter(Number_talks>3)%>% ggplot(.,aes(reorder(speaker_name, mean_talk_duration),mean_talk_duration))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()

## Scale to highlight difference in data

TED_speaker_metrics %>% filter(Number_talks>3)%>% ggplot(.,aes(reorder(speaker_name, mean_talk_duration),mean_talk_duration, fill=Number_talks))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()

## Grouping speakers number of talks by color

TED_speaker_metrics %>% filter(Number_talks>3)%>% ggplot(.,aes(reorder(speaker_name, mean_talk_duration),mean_talk_duration, fill=as.factor(Number_talks)))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()

TED_speaker_metrics %>% filter(Number_talks>3)%>% ggplot(.,aes(reorder(speaker_name, mean_talk_duration),mean_talk_duration, fill=as.factor(Number_talks)))+geom_bar(stat="identity")+coord_flip()+labs(x="",y="Mean talk duration")+theme_bw()+scale_fill_discrete("Number of talks")

#2b Create bar graphs to show the top 20 tag terms/phrase (based on the frequency of use of each term/phrase) and how frequently they were present in the dataset.

Clean data of trailing spaces and send to lower case

TEDtags <- TED %>% select(tags) %>% separate(tags,c("tag1","tag2","tag3","tag4","tag5","tag6","tag7","tag8","tag9","tag10","tag11","tag12","tag13","tag14","tag15","tag16","tag17","tag18","tag19","tag20","tag21","tag22","tag23","tag24","tag25","tag26","tag27","tag28","tag29","tag30","tag31","tag32","tag33","tag34","tag35","tag36","tag37","tag38","tag39","tag40","tag41","tag42","tag43","tag44","tag45","tag46","tag47","tag48","tag49","tag50"),sep=",") %>%
  gather(tagnum, Tag, tag1:tag50) %>%
  filter(Tag != "")
head(TEDtags)

## # A tibble: 6 x 2
##   tagnum Tag               
##   <chr>  <chr>             
## 1 tag1   alternative energy
## 2 tag1   simplicity        
## 3 tag1   MacArthur grant   
## 4 tag1   children          
## 5 tag1   demo              
## 6 tag1   entertainment

Clean data of trailing spaces and send to lower case

TEDtags$Tag <- trimws(TEDtags$Tag)
TEDtags$Tag <- tolower(TEDtags$Tag)

tagcount <-TEDtags %>% group_by(Tag)%>%summarise(Tag_count=length(Tag))%>%arrange(-Tag_count)

tagcount$Tag=fct_inorder(tagcount$Tag)

tauchart(tagcount[1:20,]) %>% tau_bar("Tag_count", "Tag", horizontal = "TRUE") %>% tau_legend() %>% tau_tooltip()

Assignment 6

Jesse Surry

4/28/2020

Part 1: Create an interactive table that shows the total number of talks given by an individual and the average duration of all their talks. Hence, there should be three columns in the table: The name, the number of talks, and the mean of the talk time (in minutes) for all their talks.

Find the duration of the talks in minutes

Check that it worked

Compute total number of talks for each speaker and the average minutes per talk per speaker.

Interactive table to show the new dataframe of the metrics

Clean data of trailing spaces and send to lower case

Clean data of trailing spaces and send to lower case