suppressMessages(library(tidyverse))
singer <- readRDS("Cleveland_singer.rds")

bass1<- singer[171:209,]
quartiles <- quantile(bass1$height, probs=c(.25, .75), na.rm = FALSE)
IQR <- IQR(bass1$height)

Lower <- quartiles[1] - 1.5*IQR
Upper <- quartiles[2] + 1.5*IQR


quartiles[1]
## 25% 
##  69
quartiles[2]
## 75% 
##  72
Lower
##  25% 
## 64.5
Upper
##  75% 
## 76.5
p<-ggplot(bass1, aes(x=voice.part, y=height)) +
  geom_boxplot()+
  geom_hline(yintercept = Lower, col="red", linetype="dotted", size=1) +
  geom_hline(yintercept = Upper, col="red", linetype="dotted", size=1) +
  geom_hline(yintercept = quartiles[1], col="green", linetype="dotted", size=1)+
  geom_hline(yintercept = quartiles[2], col="green", linetype="dotted", size=1) +
    geom_hline(yintercept = median(bass1$height), col="blue", linetype="dotted", size=1)

 q<- ggplot(bass1, aes(sample = height)) +
  geom_qq()
 
 library("gridExtra")
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
grid.arrange(p, q, nrow = 1)

By looking at QQ plot we can see all the data points; whereas a box plot provides extra detail like quartiles.

QQ-plot tells us about the type of distribution, in our case it looks like normal.

soporano1<- singer[1:66,]

quartiles1 <- quantile(soporano1$height, probs=c(.25, .75), na.rm = FALSE)
IQR1 <- IQR(soporano1$height)

Lower1 <- quartiles[1] - 1.5*IQR
Upper1 <- quartiles[2] + 1.5*IQR


quartiles1[1]
## 25% 
##  62
quartiles1[2]
## 75% 
##  65
Lower1
##  25% 
## 64.5
Upper1
##  75% 
## 76.5
p1<-ggplot(soporano1, aes(x=voice.part, y=height)) +
  geom_boxplot()+
  geom_hline(yintercept = Lower1, col="red", linetype="dotted", size=1) +
  geom_hline(yintercept = Upper1, col="red", linetype="dotted", size=1) +
  geom_hline(yintercept = quartiles1[1], col="green", linetype="dotted", size=1)+
  geom_hline(yintercept = quartiles1[2], col="green", linetype="dotted", size=1) +
    geom_hline(yintercept = median(soporano1$height), col="blue", linetype="dotted", size=1)

 q1<- ggplot(soporano1, aes(sample = height)) +
  geom_qq()

 library("gridExtra")

grid.arrange(p1, q1, nrow = 1) 

The distribution for First Soporano looks left Skewed.

Q2

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
#p1 <- plot_ly(bass1) %>% add_histogram(x = ~height)


h1 <- 
  ggplot(bass1, aes(x = height)) +
  geom_histogram(bins=10)


h0 <- 
  ggplot(bass1, aes(x = height)) +
  geom_histogram(bins=50)


 q1<- ggplot(bass1, aes(sample = height)) +
  geom_qq()

 library("gridExtra")

grid.arrange(h1, q1, nrow = 1) 

grid.arrange(h0, q1, nrow = 1) 

## Q.3

By looking at QQ plot we can see all the data points; whereas a box plot provides extra detail like quartiles.

QQ-plot tells us about the type of distribution, in our case it looks like normal.

hist(bass1$height)

 qqnorm(bass1$height)

## Q. 4

Comparision of distributions: First Bass, First Soporano, First Alto

alto1<- singer[67:101,]


## First Bass 
h1 <- 
  ggplot(bass1, aes(x = height)) +
  geom_histogram(bins=10) + labs(title=" First Bass")

 q1<- ggplot(bass1, aes(sample = height)) +
  geom_qq()+ labs(title="First Bass")


 
 ## First Alto
h3<- 
  ggplot(alto1, aes(x = height)) +
  geom_histogram(bins=10) + labs(title=" First Alto")

 q3<- ggplot(alto1, aes(sample = height)) +
  geom_qq()+ labs(title=" First Alto")

 
####
   
   x1 <- filter(singer, voice.part == "Bass 1" |  voice.part=="Alto 1") %>% 
       mutate(voice.part = droplevels(voice.part))
   

   
a1<- ggplot(x1, aes(y=height, x=voice.part, col=voice.part)) + 
            geom_jitter(position = position_jitter(width = .3, height=0))

a2<-ggplot(x1) + aes(x = voice.part, y = height) + geom_boxplot()

grid.arrange(h1, h3, nrow = 1) 

   grid.arrange(q1, q3, nrow = 1)  

 grid.arrange(a1, a2, nrow = 1)

Q.5

Manual Q-Q Plot

summary(bass1)
##      height        voice.part
##  Min.   :66.00   Bass 1 :39  
##  1st Qu.:69.00   Bass 2 : 0  
##  Median :71.00   Tenor 2: 0  
##  Mean   :70.72   Tenor 1: 0  
##  3rd Qu.:72.00   Alto 2 : 0  
##  Max.   :75.00   Alto 1 : 0  
##                  (Other): 0
summary(alto1)
##      height        voice.part
##  Min.   :60.00   Alto 1 :35  
##  1st Qu.:63.00   Bass 2 : 0  
##  Median :65.00   Bass 1 : 0  
##  Mean   :64.89   Tenor 2: 0  
##  3rd Qu.:66.50   Tenor 1: 0  
##  Max.   :72.00   Alto 2 : 0  
##                  (Other): 0
# F-Vaues for 35 dataset
i  <- 1:35
f <- (i - 0.5) / 35

round(f,2)
##  [1] 0.01 0.04 0.07 0.10 0.13 0.16 0.19 0.21 0.24 0.27 0.30 0.33 0.36 0.39 0.41
## [16] 0.44 0.47 0.50 0.53 0.56 0.59 0.61 0.64 0.67 0.70 0.73 0.76 0.79 0.81 0.84
## [31] 0.87 0.90 0.93 0.96 0.99
# Interpolation

q <- x1 %>% 
         group_by(voice.part) %>%
         arrange(height) %>%
         mutate( f.val = (row_number() - 0.5 ) / n())

qbass  <- q %>%  filter( voice.part == "Bass 1") 
qalto <- q %>%  filter( voice.part == "Alto 1") 

interp <- approx(qbass$f.val, qbass$height, f)
interp
## $x
##  [1] 0.01428571 0.04285714 0.07142857 0.10000000 0.12857143 0.15714286
##  [7] 0.18571429 0.21428571 0.24285714 0.27142857 0.30000000 0.32857143
## [13] 0.35714286 0.38571429 0.41428571 0.44285714 0.47142857 0.50000000
## [19] 0.52857143 0.55714286 0.58571429 0.61428571 0.64285714 0.67142857
## [25] 0.70000000 0.72857143 0.75714286 0.78571429 0.81428571 0.84285714
## [31] 0.87142857 0.90000000 0.92857143 0.95714286 0.98571429
## 
## $y
##  [1] 66.00000 66.34286 68.00000 68.00000 68.00000 68.00000 68.00000 68.85714
##  [9] 69.00000 69.08571 70.00000 70.00000 70.00000 70.00000 70.00000 70.00000
## [17] 70.00000 71.00000 71.00000 71.00000 71.00000 71.00000 71.57143 72.00000
## [25] 72.00000 72.00000 72.00000 72.14286 73.00000 73.00000 73.48571 74.60000
## [33] 75.00000 75.00000 75.00000
s.qq <- qalto %>% mutate(`Bass 1`  = interp$y) %>% 
                     rename(`Alto 1` = height) 

ggplot(s.qq, aes( x= `Alto 1`, y = `Bass 1`)) + geom_point() + geom_abline( intercept=0, slope=1)

## Q. 6

The q-q plot gives the relationship between two data sets(bass1 and alto1). If the two datasets are identical all dataponits fall on line paaing through origin.

I would say the follwoing plot gives more information about individual distribution as well as you can compare them simultaniously.

 grid.arrange(a1, a2, nrow = 1)