knitr::opts_chunk$set(echo = TRUE)
library(readr)
## Warning: package 'readr' was built under R version 4.0.2
library(plotrix)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.0.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.2
library(plyr)
## Warning: package 'plyr' was built under R version 4.0.2
library(gplots)
## Warning: package 'gplots' was built under R version 4.0.2
##
## Attaching package: 'gplots'
## The following object is masked from 'package:plotrix':
##
## plotCI
## The following object is masked from 'package:stats':
##
## lowess
library(RColorBrewer)
library("colorRamps")
census_data_Italy <- read_csv("C:/Users/Andrea/Desktop/PSY5210/R files/census data Italy.csv", col_names = TRUE,skip = 1)
## Parsed with column specification:
## cols(
## Year = col_double(),
## Age = col_character(),
## `Both Sexes Population` = col_double(),
## `Male Population` = col_double(),
## `Female Population` = col_double(),
## `Percent Both Sexes` = col_double(),
## `Percent Male` = col_double(),
## `Percent Female` = col_double(),
## `Sex Ratio` = col_double()
## )
census_data_China <- read_csv("C:/Users/Andrea/Desktop/PSY5210/R files/census data China.csv", col_names = TRUE,skip = 1)
## Parsed with column specification:
## cols(
## Year = col_double(),
## Age = col_character(),
## `Both Sexes Population` = col_double(),
## `Male Population` = col_double(),
## `Female Population` = col_double(),
## `Percent Both Sexes` = col_double(),
## `Percent Male` = col_double(),
## `Percent Female` = col_double(),
## `Sex Ratio` = col_double()
## )
census_data_United_States <- read_csv("C:/Users/Andrea/Desktop/PSY5210/R files/census data United States.csv", col_names = TRUE,skip = 1)
## Parsed with column specification:
## cols(
## Year = col_double(),
## Age = col_character(),
## `Both Sexes Population` = col_double(),
## `Male Population` = col_double(),
## `Female Population` = col_double(),
## `Percent Both Sexes` = col_double(),
## `Percent Male` = col_double(),
## `Percent Female` = col_double(),
## `Sex Ratio` = col_double()
## )
census_data_Nigeria <- read_csv("C:/Users/Andrea/Desktop/PSY5210/R files/census data Nigeria.csv", col_names = TRUE,skip = 1)
## Parsed with column specification:
## cols(
## Year = col_double(),
## Age = col_character(),
## `Both Sexes Population` = col_double(),
## `Male Population` = col_double(),
## `Female Population` = col_double(),
## `Percent Both Sexes` = col_double(),
## `Percent Male` = col_double(),
## `Percent Female` = col_double(),
## `Sex Ratio` = col_double()
## )
census_data_Italy = census_data_Italy[-1,]
census_data_China = census_data_China[-1,]
census_data_United_States = census_data_United_States[-1,] #deleting first row
census_data_Nigeria = census_data_Nigeria[-1,]
# 1st set of colors
from_pink_to_red = colorpanel(length(census_data_Italy$Age),"pink","red")
from_lightgreen_to_green = colorpanel(length(census_data_Italy$Age),"lightgreen","green")
# 2nd set of colors
palette = brewer.pal(11,"Spectral")
col2=rich.colors(length(census_data_Italy$Age),palette="temperature")
# 3rd set of colors
random_col = floor(runif(25)*11+1)
col3 = rev(brewer.pal(11,"Spectral"))
col4 = blue2green2red(length(census_data_Italy$Age))
#4rd set of colors
col5 = 'darkgreen'
col6 = 'navy'
par(mfrow=c(1,3))
pyramid.plot(census_data_Italy$`Percent Female`,census_data_Italy$`Percent Male`, labels=census_data_Italy$Age,main='ITALY',space = 0.2,gap = 2.5,rxcol = from_pink_to_red,lxcol = from_lightgreen_to_green,show.values = T,labelcex = 0.8,xlim = c(8.5,9)) #
## 8.5 9
## [1] 5.1 4.1 4.1 2.1
#pyramid.plot(census_data_China$`Percent Female`,census_data_China$`Percent Male`, #labels=census_data_China$Age,main = 'CHINA',space = 0.2,gap = 2.5,rxcol = palette
#,lxcol = rev(col2),show.values = T,labelcex = 0.8,xlim = c(8.5,9))
#I opted to take China out because it was very similar to the USA
pyramid.plot(census_data_United_States$`Percent Female`,census_data_United_States$`Percent Male`, labels=census_data_United_States$Age,main = 'USA',space = 0.2,gap = 2.5,rxcol = col3[random_col],lxcol = col4,show.values = T,labelcex = 0.8,xlim = c(8.5,9))
## 8.5 9
## [1] 4 2 4 2
pyramid.plot(census_data_Nigeria$`Percent Female`,census_data_Nigeria$`Percent Male`, labels=census_data_Nigeria$Age,main='NIGERIA',space = 0.2,gap = 2.5,rxcol = col5,lxcol = col6,show.values = T,labelcex = 0.8,xlim = c(15.1,15.6))
## 15.1 15.6
## [1] 4 2 4 2
#here is data from population by 5 years and as you can see Italy shows higher percentage of people between 40-64 and as many youngsters as old people. USA seems to follow a more scattered spread and it's kind of difficult to draw conclusions by it. On the other hand, Nigeria show a significant majority of people between 0 and 25 years old. This can be due to the fact that Nigeria is a developing country and people are more likely to die younger due to healthcare problems and poverty. Furthermore, due to poverty conditions in Nigeria, having more kids may be a way of supporting the famiy's livelihood by running errands like collecting water, gardening, field work, etc...
#second try
italy = data.frame(age=census_data_Italy$Age,male=census_data_Italy$`Male Population`,female=census_data_Italy$`Female Population`)
italy$age = factor(census_data_Italy$Age, levels = census_data_Italy$Age, labels = census_data_Italy$Age)
df.melt = melt(italy,
value.name='Population',
variable.name = 'Gender',
id.vars='age' )
df.melt = df.melt[-c(1, 23), ] #remove the rows with total population
#palette('default')
italy.barplot <- ggplot(df.melt, aes(x = age, y = Population, fill = Gender)) +
geom_bar(stat = "identity") +
geom_bar(stat = "identity") +
scale_y_continuous(breaks = seq(-15000000, 15000000, 5000000),
labels = paste0(as.character(c(seq(15, 0, -5), seq(5, 15, 5))), "m")) +
coord_flip() +
scale_fill_brewer(palette = "Dark2") +
theme_bw()
italy.barplot
#although this kind of looks good it is not really a pyramid plot and I have tried to adapt it from this resource https://rpubs.com/walkerke/pyramids_ggplot2
#2. Demonstration of a new Graphics Function or Package
#Install and load ggplot2 and ggridges library
library(ggplot2)
library(ggridges)
## Warning: package 'ggridges' was built under R version 4.0.2
#Ridgeline plots are partially overlapping line plots that create the #impression of a mountain range. #They can be quite useful for visualizing changes in distributions over time or space.
#The ggridges package provides two main geoms, geom_ridgeline and geom_density_ridges. #geom_ridgline: takes height values directly to draw ridgelines, #goem_denisty_ridges: first estimates data densities and then draws those using ridgelines.
#The geom geom_ridgeline can be used to draw lines with a filled area underneath.
data <- data.frame(x = 1:5, y=floor(runif(5)*10), height = c(0, 1, 3, 4, 2)) #y = rep(1, 5)
ggplot(data, aes(x, y, height = height)) + geom_ridgeline() #here the y does not really do anything
#Plot side-by-side with negative values
library(patchwork) # for side-by-side plotting
## Warning: package 'patchwork' was built under R version 4.0.2
data <- data.frame(x = 1:5, y=floor(runif(5)*10), height = c(0, 1, -1, 3, 2)) #y = rep(1, 5)
ggplot(data, aes(x, y, height = height)) + geom_ridgeline()
ggplot(data, aes(x, y, height = height)) + geom_ridgeline(min_height = -2)
#Plot multiple ridgelines
#The group aesthetic must be specified so that the geom knows
#which parts of the data belong to which ridgeline.
d <- data.frame(
x = rep(1:5, 3),
y = c(rep(0, 5), rep(1, 5), rep(2, 5)),
height = c(0, 1, 3, 4, 0, 1, 2, 3, 5, 4, 0, 5, 4, 4, 1)
)
ggplot(d, aes(x, y, height = height, group = y)) + #so y is like an index or grouping variable
geom_ridgeline(fill = "lightblue")
#Drawing ridgelines with geom_density_ridges if we set stat = "identity".
#In this case, the heights are automatically scaled such
#that the highest ridgeline just touches the one above at scale = 1.
ggplot(d, aes(x, y, height = height, group = y)) +
geom_density_ridges(stat = "identity", scale = 1) #so scale works only if y takes value between y and y+1, right?
#Density Ridgeline Plots
#The geom geom_density_ridges calculates density estimates from the provided data
#and then plots those, using the ridgeline visualization.
#The height aesthetic does not need to be specified in this case.
ggplot(d, aes(x, y, group = y)) +
geom_density_ridges(scale = 1) #also because if you specify the height you can't get distribution of x in y
## Picking joint bandwidth of 0.974
iris <- iris #is this needed?
ggplot(iris, aes(x = Sepal.Length, y = Species)) + geom_density_ridges()
## Picking joint bandwidth of 0.181
#There is also geom_density_ridges2, which uses closed polygons
#instead of ridgelines for drawing.
ggplot(iris, aes(x = Sepal.Length, y = Species)) + geom_density_ridges2() #not working for me?
## Picking joint bandwidth of 0.181
#Trailing tails can be cut off using the rel_min_height aesthetic.
#This aesthetic sets a percent cutoff relative to the highest point of any of the density curves.
#A value of 0.01 usually works well, but you may have to modify this parameter for different datasets.
ggplot(iris, aes(x = Sepal.Length, y = Species)) +
geom_density_ridges(rel_min_height = 0.01)
## Picking joint bandwidth of 0.181
#Set overlap using Scale =
#A setting of scale=1 means the tallest density curve just touches the baseline
#of the next higher one. Smaller values create a separation between the curves,
#and larger values create more overlap.
# scale = 0.9, not quite touching
ggplot(iris, aes(x = Sepal.Length, y = Species)) + geom_density_ridges(scale = 0.9)
## Picking joint bandwidth of 0.181
ggplot(iris, aes(x = Sepal.Length, y = Species)) + geom_density_ridges(scale = 0.3)
## Picking joint bandwidth of 0.181
ggplot(iris, aes(x = Sepal.Length, y = Species)) + geom_density_ridges(scale = 5)
## Picking joint bandwidth of 0.181
#Varying fill colors along the x axis
#Create gradients using geom_ridgeline_gradient and geom_density_ridges_gradient
d <- data.frame(
x = rep(1:5, 3) + c(rep(0, 5), rep(0.3, 5), rep(0.6, 5)),
y = c(rep(0, 5), rep(1, 5), rep(3, 5)),
height = c(0, 1, 3, 4, 0, 1, 2, 3, 5, 4, 0, 5, 4, 4, 1))
ggplot(d, aes(x, y, height = height, group = y, fill = factor(x+y))) +
geom_ridgeline_gradient() +
scale_fill_viridis_d(direction = -1, guide = "none") #direction of the color gradient? Guide? fill = factor(x+y)? I don't think this colours are well displayed :(
#Creating gradients for density plot
lincoln_weather <- lincoln_weather
ggplot(lincoln_weather, aes(x = `Mean Temperature [F]`, y = Month, fill = stat(x))) +
geom_density_ridges_gradient(scale = 3,rel_min_height = 0.01)
## Picking joint bandwidth of 3.37
#Add specific type of gradient colors, legend title, and main title
ggplot(lincoln_weather, aes(x = `Mean Temperature [F]`, y = Month, fill = stat(x))) +
geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
scale_fill_viridis_c(name = "Temp. [F]", option = "C") + #where is the colour stated here? fill, and then option?
labs(title = 'Temperatures in Lincoln NE in 2016')
## Picking joint bandwidth of 3.37
#Adding stats
#The ggridges package provides a stat_density_ridges.
#By setting the option quantile_lines = TRUE,
#we can make stat_density_ridges calculate the position of lines indicating quantiles.
#By default, it breaks distibution into quartles
ggplot(iris, aes(x = Sepal.Length, y = Species)) +
stat_density_ridges(quantile_lines = TRUE)
## Picking joint bandwidth of 0.181
#Change the number of quantiles by specifying it via the quantiles option.
#Note that quantiles = 2 implies one line (the median) at the boundary between the two quantiles.
ggplot(iris, aes(x = Sepal.Length, y = Species)) +
stat_density_ridges(quantile_lines = TRUE, quantiles = 2)
## Picking joint bandwidth of 0.181
#We can also specify quantiles by cut points rather than number.
#E.g., we can indicate the 2.5% and 97.5% tails.
ggplot(iris, aes(x = Sepal.Length, y = Species)) +
stat_density_ridges(quantile_lines = TRUE, quantiles = c(0.025, 0.975), alpha = 0.7)
## Picking joint bandwidth of 0.181
#Using the geom geom_density_ridges_gradient we can also color by quantile,
#via the calculated stat(quantile) aesthetic.
#Note that this aesthetic is only calculated if calc_ecdf = TRUE.
ggplot(iris, aes(x=Sepal.Length, y=Species, fill = factor(stat(quantile)))) +
stat_density_ridges(
geom = "density_ridges_gradient", calc_ecdf = TRUE,
quantiles = 4, quantile_lines = TRUE
) +
scale_fill_viridis_d(name = "Quartiles")
## Picking joint bandwidth of 0.181
#Adding Jitterinb Points
#The stat stat_density_ridges also provides the option to visualize
#the original data points from which the distributions are generated.
#This can be done by setting jittered_points = TRUE,
#either in stat_density_ridges or in geom_density_ridges:
ggplot(iris, aes(x = Sepal.Length, y = Species)) +
geom_density_ridges(jittered_points = TRUE)
## Picking joint bandwidth of 0.181
#Where the points are shown can be controlled with position options,
#e.g. 'raincloud' for the raincloud effect:
ggplot(iris, aes(x = Sepal.Length, y = Species)) +
geom_density_ridges(
jittered_points = TRUE, position = "raincloud",
alpha = 0.7, scale = 0.9
)
## Picking joint bandwidth of 0.181
#Style the jiggered points
#scale_discrete_manual() which can be used to make arbitrary discrete scales
#for arbitrary aesthetics.
#various point aesthetic scales, such as scale_point_color_hue()
ggplot(iris, aes(x = Sepal.Length, y = Species, fill = Species)) +
geom_density_ridges(
aes(point_color = Species, point_fill = Species, point_shape = Species),
alpha = .2, point_alpha = 1, jittered_points = TRUE
) +
scale_point_color_hue(l = 40) +
scale_discrete_manual(aesthetics = "point_shape", values = c(21, 22, 23))
## Picking joint bandwidth of 0.181
#Themed ridges
#Cyclical scales to draw ridelines with alternating colors
#scale_fill_cyclical(values = c("blue", "green"))
diamonds <- diamonds
ggplot(diamonds, aes(x = price, y = cut, fill = cut)) +
geom_density_ridges(scale = 4) +
scale_fill_cyclical(values = c("blue", "green"))
## Picking joint bandwidth of 458
#Add a legend by using guide = "legend"
ggplot(diamonds, aes(x = price, y = cut, fill = cut)) +
geom_density_ridges(scale = 4) +
scale_fill_cyclical(values = c("blue", "green",'red','yellow'), guide = "legend") #isn't this just displaying two values?
## Picking joint bandwidth of 458
library(readxl)
## Warning: package 'readxl' was built under R version 4.0.2
EMG_DATA_R_2 <- read_excel("~/MATLAB/EMG_DATA_R_2.xls",
col_names = FALSE)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
#View(EMG_DATA_R_2)
normalized_emg = as.data.frame(EMG_DATA_R_2)
colnames(normalized_emg) = c('RTA','LTA','RMG','LMG','RRF','LRF','RBF','LBF')
#plot(normalized_emg[,1],type='l')
library(ggridges)
library(ggplot2)
our_plot = matrix(NA,length(normalized_emg[,1])*8,3)
colnames(our_plot) = c('Muscles', 'Hz', 'EMG')
our_plot[,1] = NA
our_plot[1:3360,1] = 1#'RTA'
our_plot[3360:(3360*2),1] = 2#'LTA'
our_plot[(3360*2):(3360*3),1] =3 #'RMG'
our_plot[(3360*3):(3360*4),1] = 4#'LMG'
our_plot[(3360*4):(3360*5),1] = 5#'RRF'
our_plot[(3360*5):(3360*6),1] = 6#'LRF'
our_plot[(3360*6):(3360*7),1] = 7#'RBF'
our_plot[(3360*7):(3360*8),1] = 8#'LBF'
Time<-c(1:3360)
our_plot[,2] = rep(Time, 8)
our_plot[,3] = round(c(normalized_emg$RTA,normalized_emg$LTA,normalized_emg$RMG,
normalized_emg$LMG,normalized_emg$RRF,normalized_emg$LRF,
normalized_emg$RBF,normalized_emg$LBF),4)
our_plot = as.data.frame(our_plot)
ggplot(our_plot,aes(x=Hz,y=Muscles,height=EMG,group=Muscles)) + geom_ridgeline(fill=('blue'),scale=.1,alpha=0.5) # + scale_fill_cyclical(values = c('blue','green'))
# I can't manage to put the muscles names in the 8 different emg signals...