suppressMessages(library(tidyverse))
singer <- readRDS("Cleveland_singer.rds")
bass1<- singer[171:209,]
quartiles <- quantile(bass1$height, probs=c(.25, .75), na.rm = FALSE)
IQR <- IQR(bass1$height)
Lower <- quartiles[1] - 1.5*IQR
Upper <- quartiles[2] + 1.5*IQR
quartiles[1]
## 25%
## 69
quartiles[2]
## 75%
## 72
Lower
## 25%
## 64.5
Upper
## 75%
## 76.5
p<-ggplot(bass1, aes(x=voice.part, y=height)) +
geom_boxplot()+
geom_hline(yintercept = Lower, col="red", linetype="dotted", size=1) +
geom_hline(yintercept = Upper, col="red", linetype="dotted", size=1) +
geom_hline(yintercept = quartiles[1], col="green", linetype="dotted", size=1)+
geom_hline(yintercept = quartiles[2], col="green", linetype="dotted", size=1) +
geom_hline(yintercept = median(bass1$height), col="blue", linetype="dotted", size=1)
q<- ggplot(bass1, aes(sample = height)) +
geom_qq()
library("gridExtra")
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
grid.arrange(p, q, nrow = 1)
soporano1<- singer[1:66,]
quartiles1 <- quantile(soporano1$height, probs=c(.25, .75), na.rm = FALSE)
IQR1 <- IQR(soporano1$height)
Lower1 <- quartiles[1] - 1.5*IQR
Upper1 <- quartiles[2] + 1.5*IQR
quartiles1[1]
## 25%
## 62
quartiles1[2]
## 75%
## 65
Lower1
## 25%
## 64.5
Upper1
## 75%
## 76.5
p1<-ggplot(soporano1, aes(x=voice.part, y=height)) +
geom_boxplot()+
geom_hline(yintercept = Lower1, col="red", linetype="dotted", size=1) +
geom_hline(yintercept = Upper1, col="red", linetype="dotted", size=1) +
geom_hline(yintercept = quartiles1[1], col="green", linetype="dotted", size=1)+
geom_hline(yintercept = quartiles1[2], col="green", linetype="dotted", size=1) +
geom_hline(yintercept = median(soporano1$height), col="blue", linetype="dotted", size=1)
q1<- ggplot(soporano1, aes(sample = height)) +
geom_qq()
library("gridExtra")
grid.arrange(p1, q1, nrow = 1)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#p1 <- plot_ly(bass1) %>% add_histogram(x = ~height)
h1 <-
ggplot(bass1, aes(x = height)) +
geom_histogram(bins=10)
h0 <-
ggplot(bass1, aes(x = height)) +
geom_histogram(bins=50)
q1<- ggplot(bass1, aes(sample = height)) +
geom_qq()
library("gridExtra")
grid.arrange(h1, q1, nrow = 1)
grid.arrange(h0, q1, nrow = 1)
## Q.3
hist(bass1$height)
qqnorm(bass1$height)
## Q. 4
alto1<- singer[67:101,]
## First Bass
h1 <-
ggplot(bass1, aes(x = height)) +
geom_histogram(bins=10) + labs(title=" First Bass")
q1<- ggplot(bass1, aes(sample = height)) +
geom_qq()+ labs(title="First Bass")
## First Alto
h3<-
ggplot(alto1, aes(x = height)) +
geom_histogram(bins=10) + labs(title=" First Alto")
q3<- ggplot(alto1, aes(sample = height)) +
geom_qq()+ labs(title=" First Alto")
####
x1 <- filter(singer, voice.part == "Bass 1" | voice.part=="Alto 1") %>%
mutate(voice.part = droplevels(voice.part))
a1<- ggplot(x1, aes(y=height, x=voice.part, col=voice.part)) +
geom_jitter(position = position_jitter(width = .3, height=0))
a2<-ggplot(x1) + aes(x = voice.part, y = height) + geom_boxplot()
grid.arrange(h1, h3, nrow = 1)
grid.arrange(q1, q3, nrow = 1)
grid.arrange(a1, a2, nrow = 1)
summary(bass1)
## height voice.part
## Min. :66.00 Bass 1 :39
## 1st Qu.:69.00 Bass 2 : 0
## Median :71.00 Tenor 2: 0
## Mean :70.72 Tenor 1: 0
## 3rd Qu.:72.00 Alto 2 : 0
## Max. :75.00 Alto 1 : 0
## (Other): 0
summary(alto1)
## height voice.part
## Min. :60.00 Alto 1 :35
## 1st Qu.:63.00 Bass 2 : 0
## Median :65.00 Bass 1 : 0
## Mean :64.89 Tenor 2: 0
## 3rd Qu.:66.50 Tenor 1: 0
## Max. :72.00 Alto 2 : 0
## (Other): 0
# F-Vaues for 35 dataset
i <- 1:35
f <- (i - 0.5) / 35
round(f,2)
## [1] 0.01 0.04 0.07 0.10 0.13 0.16 0.19 0.21 0.24 0.27 0.30 0.33 0.36 0.39 0.41
## [16] 0.44 0.47 0.50 0.53 0.56 0.59 0.61 0.64 0.67 0.70 0.73 0.76 0.79 0.81 0.84
## [31] 0.87 0.90 0.93 0.96 0.99
# Interpolation
q <- x1 %>%
group_by(voice.part) %>%
arrange(height) %>%
mutate( f.val = (row_number() - 0.5 ) / n())
qbass <- q %>% filter( voice.part == "Bass 1")
qalto <- q %>% filter( voice.part == "Alto 1")
interp <- approx(qbass$f.val, qbass$height, f)
interp
## $x
## [1] 0.01428571 0.04285714 0.07142857 0.10000000 0.12857143 0.15714286
## [7] 0.18571429 0.21428571 0.24285714 0.27142857 0.30000000 0.32857143
## [13] 0.35714286 0.38571429 0.41428571 0.44285714 0.47142857 0.50000000
## [19] 0.52857143 0.55714286 0.58571429 0.61428571 0.64285714 0.67142857
## [25] 0.70000000 0.72857143 0.75714286 0.78571429 0.81428571 0.84285714
## [31] 0.87142857 0.90000000 0.92857143 0.95714286 0.98571429
##
## $y
## [1] 66.00000 66.34286 68.00000 68.00000 68.00000 68.00000 68.00000 68.85714
## [9] 69.00000 69.08571 70.00000 70.00000 70.00000 70.00000 70.00000 70.00000
## [17] 70.00000 71.00000 71.00000 71.00000 71.00000 71.00000 71.57143 72.00000
## [25] 72.00000 72.00000 72.00000 72.14286 73.00000 73.00000 73.48571 74.60000
## [33] 75.00000 75.00000 75.00000
s.qq <- qalto %>% mutate(`Bass 1` = interp$y) %>%
rename(`Alto 1` = height)
ggplot(s.qq, aes( x= `Alto 1`, y = `Bass 1`)) + geom_point() + geom_abline( intercept=0, slope=1)
## Q. 6
grid.arrange(a1, a2, nrow = 1)