First load some libraries.

library(syuzhet)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(zoo)
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(tidyr)

Then define some functions.

# I redefine Syuzhet's get_percentage_values function to match a bug I submitted, and to allow it to return percents on documents less than 200 sentences long; that cutoff was arbitrary

get_percentage_values <- function(raw_values, bins = 100){
  if(!is.numeric(raw_values)) stop("Input must be a numeric vector")
  #if(length(raw_values)/bins < 2){
  #  stop("Input vector needs to be twice as long as value number to make percentage based segmentation viable")
  #}
  chunks <- split(raw_values, cut(1:length(raw_values),100))
  means = sapply(chunks, mean)
  names(means) = 1:100
  return(means)
}

get_sentiment_vector = function(location) {
  document = get_text_as_string(location %>% as.character)
  poa_v = get_sentences(document)
  if (length(poa_v) < 100) {
    #just punt on the short ones for simplicity
    return(c())
  }
  sentiment_vector <- get_sentiment(poa_v, method="bing")
}

arcsFromFile = function(destination) {
  destination %>% get_sentiment_vector %>% arcs_from_vector()
}

arcs_from_vector = function(sentiment_vector) {
  # Fail on docs shorter than 100 sentences for simplicity

  if (length(sentiment_vector)<100 ) {return(data.frame())}
  
  percent_vals <- get_percentage_values(c(sentiment_vector))
  
  mymean = function(x) {mean(x,na.rm=T)}
  # A rolling average
  rollingAverage = rollapply(percent_vals,width=30,FUN=mymean,fill=NA)
  # A loess average
  loessAverage = predict(loess(percent_vals ~ c(1:100)),span=0.3)
  # The fourier transformed values
  ft_values <- get_transformed_values(sentiment_vector, low_pass_size = 3,x_reverse_len = 100, scale_vals = TRUE,scale_range = FALSE)
  
  data.frame(Fourier = ft_values,
             loessAverage,rollingAverage,
             percentage=1:length(ft_values)) %>% 
    gather(method,normalizedScore,Fourier,rollingAverage,loessAverage)
  
}

Create an artificial plot that gets happier.

# Create an artificial plot that gets happier as time goes on.

a = arcs_from_vector(seq(-5,5,length.out=400))

ggplot(a %>% filter(method!="Fourier")) + aes(x=percentage,y=normalizedScore,color=method) +
  geom_line() + labs(title="With a plot that ascends straightforwardly,\nloess or average smoothing works fine.")
## Warning: Removed 29 rows containing missing values (geom_path).

Things get weird in the fourier smoothing.

ggplot(a) + aes(x=percentage,y=normalizedScore,color=method) + 
  geom_line() + labs(title="on a straightforwardly upward plot,\nsyuzhet's fourier filter imposes an assumption of cyclicality")
## Warning: Removed 29 rows containing missing values (geom_path).

This is true on real data as well: look at how it treats state of the unions since 1960.

The function is defined so that you can swap this out with any other folder filled with .txt files: see what this looks like on your sources.

SOTUS = data.frame(filename=list.files("~/HDA15/data/SOTUS",full.names = T)) %>% mutate(short_name = gsub(".*/([^/]+).txt","\\1",filename))

# Remove this line if your filenames aren't years.
SOTUS = SOTUS %>% filter(as.numeric(short_name) > 1960)

h = SOTUS %>% group_by(filename,short_name) %>% do(arcsFromFile(.$filename)) %>%ungroup

ggplot(h) + aes(x=percentage,y=normalizedScore,color=method) + 
  geom_line() + facet_wrap(~short_name) + 
  geom_line(data = h %>% filter(percentage %in% c(1,100)),lty=2) + labs(title="Syuzhet tries to make State of the Unions\nlook cyclical:\ndashed lines show overall trend in Fourier vs. loess smoothing")
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).
## Warning: Removed 29 rows containing missing values (geom_path).