knitr::opts_chunk$set(echo = TRUE)

library(readr)
## Warning: package 'readr' was built under R version 4.0.2
library(plotrix)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.0.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.2
library(plyr)
## Warning: package 'plyr' was built under R version 4.0.2
library(gplots)
## Warning: package 'gplots' was built under R version 4.0.2
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:plotrix':
## 
##     plotCI
## The following object is masked from 'package:stats':
## 
##     lowess
library(RColorBrewer)
library("colorRamps")


census_data_Italy <- read_csv("C:/Users/Andrea/Desktop/PSY5210/R files/census data Italy.csv", col_names = TRUE,skip = 1)
## Parsed with column specification:
## cols(
##   Year = col_double(),
##   Age = col_character(),
##   `Both Sexes Population` = col_double(),
##   `Male Population` = col_double(),
##   `Female Population` = col_double(),
##   `Percent Both Sexes` = col_double(),
##   `Percent Male` = col_double(),
##   `Percent Female` = col_double(),
##   `Sex Ratio` = col_double()
## )
census_data_China <- read_csv("C:/Users/Andrea/Desktop/PSY5210/R files/census data China.csv", col_names = TRUE,skip = 1)
## Parsed with column specification:
## cols(
##   Year = col_double(),
##   Age = col_character(),
##   `Both Sexes Population` = col_double(),
##   `Male Population` = col_double(),
##   `Female Population` = col_double(),
##   `Percent Both Sexes` = col_double(),
##   `Percent Male` = col_double(),
##   `Percent Female` = col_double(),
##   `Sex Ratio` = col_double()
## )
census_data_United_States <- read_csv("C:/Users/Andrea/Desktop/PSY5210/R files/census data United States.csv", col_names = TRUE,skip = 1)
## Parsed with column specification:
## cols(
##   Year = col_double(),
##   Age = col_character(),
##   `Both Sexes Population` = col_double(),
##   `Male Population` = col_double(),
##   `Female Population` = col_double(),
##   `Percent Both Sexes` = col_double(),
##   `Percent Male` = col_double(),
##   `Percent Female` = col_double(),
##   `Sex Ratio` = col_double()
## )
census_data_Nigeria <- read_csv("C:/Users/Andrea/Desktop/PSY5210/R files/census data Nigeria.csv", col_names = TRUE,skip = 1)
## Parsed with column specification:
## cols(
##   Year = col_double(),
##   Age = col_character(),
##   `Both Sexes Population` = col_double(),
##   `Male Population` = col_double(),
##   `Female Population` = col_double(),
##   `Percent Both Sexes` = col_double(),
##   `Percent Male` = col_double(),
##   `Percent Female` = col_double(),
##   `Sex Ratio` = col_double()
## )
census_data_Italy = census_data_Italy[-1,]
census_data_China = census_data_China[-1,]
census_data_United_States = census_data_United_States[-1,]    #deleting first row
census_data_Nigeria = census_data_Nigeria[-1,]



# 1st set of colors
from_pink_to_red = colorpanel(length(census_data_Italy$Age),"pink","red") 
from_lightgreen_to_green = colorpanel(length(census_data_Italy$Age),"lightgreen","green")

# 2nd set of colors
palette = brewer.pal(11,"Spectral")
col2=rich.colors(length(census_data_Italy$Age),palette="temperature")

# 3rd set of colors
random_col = floor(runif(25)*11+1)
col3 = rev(brewer.pal(11,"Spectral"))
col4 = blue2green2red(length(census_data_Italy$Age))

#4rd set of colors
col5 = 'darkgreen'
col6 = 'navy'


par(mfrow=c(1,3))

pyramid.plot(census_data_Italy$`Percent Female`,census_data_Italy$`Percent Male`, labels=census_data_Italy$Age,main='ITALY',space = 0.2,gap = 2.5,rxcol = from_pink_to_red,lxcol = from_lightgreen_to_green,show.values = T,labelcex = 0.8,xlim = c(8.5,9)) #
## 8.5 9
## [1] 5.1 4.1 4.1 2.1
#pyramid.plot(census_data_China$`Percent Female`,census_data_China$`Percent Male`, #labels=census_data_China$Age,main = 'CHINA',space = 0.2,gap = 2.5,rxcol = palette
#,lxcol = rev(col2),show.values = T,labelcex = 0.8,xlim = c(8.5,9))

#I opted to take China out because it was very similar to the USA

pyramid.plot(census_data_United_States$`Percent Female`,census_data_United_States$`Percent Male`, labels=census_data_United_States$Age,main = 'USA',space = 0.2,gap = 2.5,rxcol = col3[random_col],lxcol = col4,show.values = T,labelcex = 0.8,xlim = c(8.5,9))
## 8.5 9
## [1] 4 2 4 2
pyramid.plot(census_data_Nigeria$`Percent Female`,census_data_Nigeria$`Percent Male`, labels=census_data_Nigeria$Age,main='NIGERIA',space = 0.2,gap = 2.5,rxcol = col5,lxcol = col6,show.values = T,labelcex = 0.8,xlim = c(15.1,15.6))

## 15.1 15.6
## [1] 4 2 4 2
#here is data from population by 5 years and as you can see Italy shows higher percentage of people between 40-64 and as many youngsters as old people. USA seems to follow a more scattered spread and it's kind of difficult to draw conclusions by it. On the other hand, Nigeria show a significant majority of people between 0 and 25 years old. This can be due to the fact that Nigeria is a developing country and people are more likely to die younger due to healthcare problems and poverty. Furthermore, due to poverty conditions in Nigeria, having more kids may be a way of supporting the famiy's livelihood by running errands like collecting water, gardening, field work, etc...


#second try
italy = data.frame(age=census_data_Italy$Age,male=census_data_Italy$`Male Population`,female=census_data_Italy$`Female Population`)

italy$age = factor(census_data_Italy$Age, levels = census_data_Italy$Age, labels = census_data_Italy$Age)

df.melt = melt(italy, 
                   value.name='Population', 
                   variable.name = 'Gender', 
                   id.vars='age' )


df.melt = df.melt[-c(1, 23), ] #remove the rows with total population


#palette('default') 
italy.barplot <- ggplot(df.melt, aes(x = age, y = Population, fill = Gender)) + 
  geom_bar(stat = "identity") + 
  geom_bar(stat = "identity") + 
  scale_y_continuous(breaks = seq(-15000000, 15000000, 5000000), 
                     labels = paste0(as.character(c(seq(15, 0, -5), seq(5, 15, 5))), "m")) + 
  coord_flip() + 
  scale_fill_brewer(palette = "Dark2") + 
  theme_bw()

italy.barplot

#although this kind of looks good it is not really a pyramid plot and I have tried to adapt it from this resource https://rpubs.com/walkerke/pyramids_ggplot2

#2. Demonstration of a new Graphics Function or Package

#Install and load ggplot2 and ggridges library

library(ggplot2)
library(ggridges)
## Warning: package 'ggridges' was built under R version 4.0.2

#Ridgeline plots are partially overlapping line plots that create the #impression of a mountain range. #They can be quite useful for visualizing changes in distributions over time or space.

#The ggridges package provides two main geoms, geom_ridgeline and geom_density_ridges. #geom_ridgline: takes height values directly to draw ridgelines, #goem_denisty_ridges: first estimates data densities and then draws those using ridgelines.

#The geom geom_ridgeline can be used to draw lines with a filled area underneath.

data <- data.frame(x = 1:5, y=floor(runif(5)*10), height = c(0, 1, 3, 4, 2))  #y = rep(1, 5)

ggplot(data, aes(x, y,  height = height)) + geom_ridgeline() #here the y does not really do anything

#Plot side-by-side with negative values
library(patchwork) # for side-by-side plotting
## Warning: package 'patchwork' was built under R version 4.0.2
data <- data.frame(x = 1:5, y=floor(runif(5)*10), height = c(0, 1, -1, 3, 2)) #y = rep(1, 5)

ggplot(data, aes(x, y, height = height)) + geom_ridgeline()

ggplot(data, aes(x, y, height = height)) +  geom_ridgeline(min_height = -2)

#Plot multiple ridgelines
#The group aesthetic must be specified so that the geom knows 
#which parts of the data belong to which ridgeline.

d <- data.frame(
  x = rep(1:5, 3),
  y = c(rep(0, 5), rep(1, 5), rep(2, 5)),
  height = c(0, 1, 3, 4, 0, 1, 2, 3, 5, 4, 0, 5, 4, 4, 1)
)

ggplot(d, aes(x, y, height = height, group = y)) +            #so y is like an index or grouping variable
  geom_ridgeline(fill = "lightblue")

#Drawing ridgelines with geom_density_ridges if we set stat = "identity". 
#In this case, the heights are automatically scaled such 
#that the highest ridgeline just touches the one above at scale = 1.

ggplot(d, aes(x, y, height = height, group = y)) + 
  geom_density_ridges(stat = "identity", scale = 1)      #so scale works only if y takes value between y and y+1, right?

#Density Ridgeline Plots
#The geom geom_density_ridges calculates density estimates from the provided data 
#and then plots those, using the ridgeline visualization. 
#The height aesthetic does not need to be specified in this case.

ggplot(d, aes(x, y, group = y)) + 
  geom_density_ridges(scale = 1)              #also because if you specify the height you can't get distribution of x in y
## Picking joint bandwidth of 0.974

iris <- iris #is this needed?


ggplot(iris, aes(x = Sepal.Length, y = Species)) + geom_density_ridges()  
## Picking joint bandwidth of 0.181

#There is also geom_density_ridges2, which uses closed polygons 
#instead of ridgelines for drawing.

ggplot(iris, aes(x = Sepal.Length, y = Species)) + geom_density_ridges2()   #not working for me?
## Picking joint bandwidth of 0.181

#Trailing tails can be cut off using the rel_min_height aesthetic. 
#This aesthetic sets a percent cutoff relative to the highest point of any of the density curves.
#A value of 0.01 usually works well, but you may have to modify this parameter for different datasets.

ggplot(iris, aes(x = Sepal.Length, y = Species)) + 
  geom_density_ridges(rel_min_height = 0.01)
## Picking joint bandwidth of 0.181

#Set overlap using Scale = 
#A setting of scale=1 means the tallest density curve just touches the baseline 
#of the next higher one. Smaller values create a separation between the curves, 
#and larger values create more overlap.


# scale = 0.9, not quite touching
ggplot(iris, aes(x = Sepal.Length, y = Species)) + geom_density_ridges(scale = 0.9)
## Picking joint bandwidth of 0.181

ggplot(iris, aes(x = Sepal.Length, y = Species)) + geom_density_ridges(scale = 0.3)
## Picking joint bandwidth of 0.181

ggplot(iris, aes(x = Sepal.Length, y = Species)) + geom_density_ridges(scale = 5)
## Picking joint bandwidth of 0.181

#Varying fill colors along the x axis
#Create gradients using geom_ridgeline_gradient and geom_density_ridges_gradient

d <- data.frame(
  x = rep(1:5, 3) + c(rep(0, 5), rep(0.3, 5), rep(0.6, 5)),
  y = c(rep(0, 5), rep(1, 5), rep(3, 5)),
  height = c(0, 1, 3, 4, 0, 1, 2, 3, 5, 4, 0, 5, 4, 4, 1))

ggplot(d, aes(x, y, height = height, group = y, fill = factor(x+y))) +
  geom_ridgeline_gradient() +
  scale_fill_viridis_d(direction = -1, guide = "none") #direction of the color gradient? Guide? fill = factor(x+y)? I don't think this colours are well displayed :(

#Creating gradients for density plot

lincoln_weather <- lincoln_weather

ggplot(lincoln_weather, aes(x = `Mean Temperature [F]`, y = Month, fill = stat(x))) +
  geom_density_ridges_gradient(scale = 3,rel_min_height = 0.01) 
## Picking joint bandwidth of 3.37

#Add specific type of gradient colors, legend title, and main title
ggplot(lincoln_weather, aes(x = `Mean Temperature [F]`, y = Month, fill = stat(x))) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "Temp. [F]", option = "C") +           #where is the colour stated here? fill, and then option?
  labs(title = 'Temperatures in Lincoln NE in 2016')
## Picking joint bandwidth of 3.37

#Adding stats
#The ggridges package provides a stat_density_ridges.

#By setting the option quantile_lines = TRUE, 
#we can make stat_density_ridges calculate the position of lines indicating quantiles. 
#By default, it breaks distibution into quartles

ggplot(iris, aes(x = Sepal.Length, y = Species)) +
  stat_density_ridges(quantile_lines = TRUE)
## Picking joint bandwidth of 0.181

#Change the number of quantiles by specifying it via the quantiles option. 
#Note that quantiles = 2 implies one line (the median) at the boundary between the two quantiles.

ggplot(iris, aes(x = Sepal.Length, y = Species)) +
  stat_density_ridges(quantile_lines = TRUE, quantiles = 2)
## Picking joint bandwidth of 0.181

#We can also specify quantiles by cut points rather than number. 
#E.g., we can indicate the 2.5% and 97.5% tails.

ggplot(iris, aes(x = Sepal.Length, y = Species)) +
  stat_density_ridges(quantile_lines = TRUE, quantiles = c(0.025, 0.975), alpha = 0.7)
## Picking joint bandwidth of 0.181

#Using the geom geom_density_ridges_gradient we can also color by quantile, 
#via the calculated stat(quantile) aesthetic. 
#Note that this aesthetic is only calculated if calc_ecdf = TRUE.

ggplot(iris, aes(x=Sepal.Length, y=Species, fill = factor(stat(quantile)))) +
  stat_density_ridges(
    geom = "density_ridges_gradient", calc_ecdf = TRUE,
    quantiles = 4, quantile_lines = TRUE
  ) +
  scale_fill_viridis_d(name = "Quartiles")
## Picking joint bandwidth of 0.181

#Adding Jitterinb Points

#The stat stat_density_ridges also provides the option to visualize 
#the original data points from which the distributions are generated. 
#This can be done by setting jittered_points = TRUE, 
#either in stat_density_ridges or in geom_density_ridges:

ggplot(iris, aes(x = Sepal.Length, y = Species)) +
  geom_density_ridges(jittered_points = TRUE)
## Picking joint bandwidth of 0.181

#Where the points are shown can be controlled with position options, 
#e.g. 'raincloud' for the raincloud effect:

ggplot(iris, aes(x = Sepal.Length, y = Species)) +
  geom_density_ridges(
    jittered_points = TRUE, position = "raincloud",
    alpha = 0.7, scale = 0.9
  )
## Picking joint bandwidth of 0.181

#Style the jiggered points 
#scale_discrete_manual() which can be used to make arbitrary discrete scales 
#for arbitrary aesthetics.
#various point aesthetic scales, such as scale_point_color_hue()

ggplot(iris, aes(x = Sepal.Length, y = Species, fill = Species)) +
  geom_density_ridges(
    aes(point_color = Species, point_fill = Species, point_shape = Species),
    alpha = .2, point_alpha = 1, jittered_points = TRUE
  ) +
  scale_point_color_hue(l = 40) +
  scale_discrete_manual(aesthetics = "point_shape", values = c(21, 22, 23))
## Picking joint bandwidth of 0.181

#Themed ridges

#Cyclical scales to draw ridelines with alternating colors
#scale_fill_cyclical(values = c("blue", "green"))

diamonds <- diamonds

ggplot(diamonds, aes(x = price, y = cut, fill = cut)) + 
  geom_density_ridges(scale = 4) + 
  scale_fill_cyclical(values = c("blue", "green"))
## Picking joint bandwidth of 458

#Add a legend by using guide = "legend"

ggplot(diamonds, aes(x = price, y = cut, fill = cut)) + 
  geom_density_ridges(scale = 4) + 
  scale_fill_cyclical(values = c("blue", "green",'red','yellow'), guide = "legend") #isn't this just displaying two values?
## Picking joint bandwidth of 458

library(readxl)
## Warning: package 'readxl' was built under R version 4.0.2
EMG_DATA_R_2 <- read_excel("~/MATLAB/EMG_DATA_R_2.xls", 
                           col_names = FALSE)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
#View(EMG_DATA_R_2)


normalized_emg = as.data.frame(EMG_DATA_R_2)
colnames(normalized_emg) = c('RTA','LTA','RMG','LMG','RRF','LRF','RBF','LBF')

#plot(normalized_emg[,1],type='l')



library(ggridges)
library(ggplot2)



our_plot = matrix(NA,length(normalized_emg[,1])*8,3)
colnames(our_plot) = c('Muscles', 'Hz', 'EMG')


our_plot[,1] = NA

our_plot[1:3360,1] = 1#'RTA'
our_plot[3360:(3360*2),1] = 2#'LTA'
our_plot[(3360*2):(3360*3),1] =3 #'RMG'
our_plot[(3360*3):(3360*4),1] = 4#'LMG'
our_plot[(3360*4):(3360*5),1] = 5#'RRF'
our_plot[(3360*5):(3360*6),1] = 6#'LRF'
our_plot[(3360*6):(3360*7),1] = 7#'RBF'
our_plot[(3360*7):(3360*8),1] = 8#'LBF'


Time<-c(1:3360)
our_plot[,2] = rep(Time, 8)



our_plot[,3] = round(c(normalized_emg$RTA,normalized_emg$LTA,normalized_emg$RMG,
                 normalized_emg$LMG,normalized_emg$RRF,normalized_emg$LRF,
                 normalized_emg$RBF,normalized_emg$LBF),4)


our_plot = as.data.frame(our_plot)


ggplot(our_plot,aes(x=Hz,y=Muscles,height=EMG,group=Muscles)) + geom_ridgeline(fill=('blue'),scale=.1,alpha=0.5) # + scale_fill_cyclical(values = c('blue','green'))

# I can't manage to put the muscles names in the 8 different emg signals...