HW_9_697V

suppressMessages(library(tidyverse))
singer <- readRDS("Cleveland_singer.rds")

bass1<- singer[171:209,]

quartiles <- quantile(bass1$height, probs=c(.25, .75), na.rm = FALSE)
IQR <- IQR(bass1$height)

Lower <- quartiles[1] - 1.5*IQR
Upper <- quartiles[2] + 1.5*IQR


quartiles[1]

## 25% 
##  69

quartiles[2]

## 75% 
##  72

Lower

##  25% 
## 64.5

Upper

##  75% 
## 76.5

p<-ggplot(bass1, aes(x=voice.part, y=height)) +
  geom_boxplot()+
  geom_hline(yintercept = Lower, col="red", linetype="dotted", size=1) +
  geom_hline(yintercept = Upper, col="red", linetype="dotted", size=1) +
  geom_hline(yintercept = quartiles[1], col="green", linetype="dotted", size=1)+
  geom_hline(yintercept = quartiles[2], col="green", linetype="dotted", size=1) +
    geom_hline(yintercept = median(bass1$height), col="blue", linetype="dotted", size=1)

 q<- ggplot(bass1, aes(sample = height)) +
  geom_qq()
 
 library("gridExtra")

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

grid.arrange(p, q, nrow = 1)

By looking at QQ plot we can see all the data points; whereas a box plot provides extra detail like quartiles.

QQ-plot tells us about the type of distribution, in our case it looks like normal.

soporano1<- singer[1:66,]

quartiles1 <- quantile(soporano1$height, probs=c(.25, .75), na.rm = FALSE)
IQR1 <- IQR(soporano1$height)

Lower1 <- quartiles[1] - 1.5*IQR
Upper1 <- quartiles[2] + 1.5*IQR


quartiles1[1]

## 25% 
##  62

quartiles1[2]

## 75% 
##  65

Lower1

##  25% 
## 64.5

Upper1

##  75% 
## 76.5

p1<-ggplot(soporano1, aes(x=voice.part, y=height)) +
  geom_boxplot()+
  geom_hline(yintercept = Lower1, col="red", linetype="dotted", size=1) +
  geom_hline(yintercept = Upper1, col="red", linetype="dotted", size=1) +
  geom_hline(yintercept = quartiles1[1], col="green", linetype="dotted", size=1)+
  geom_hline(yintercept = quartiles1[2], col="green", linetype="dotted", size=1) +
    geom_hline(yintercept = median(soporano1$height), col="blue", linetype="dotted", size=1)

 q1<- ggplot(soporano1, aes(sample = height)) +
  geom_qq()

 library("gridExtra")

grid.arrange(p1, q1, nrow = 1)

The distribution for First Soporano looks left Skewed.

Q2

library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

#p1 <- plot_ly(bass1) %>% add_histogram(x = ~height)


h1 <- 
  ggplot(bass1, aes(x = height)) +
  geom_histogram(bins=10)


h0 <- 
  ggplot(bass1, aes(x = height)) +
  geom_histogram(bins=50)


 q1<- ggplot(bass1, aes(sample = height)) +
  geom_qq()

 library("gridExtra")

grid.arrange(h1, q1, nrow = 1)

grid.arrange(h0, q1, nrow = 1)

## Q.3

By looking at QQ plot we can see all the data points; whereas a box plot provides extra detail like quartiles.

QQ-plot tells us about the type of distribution, in our case it looks like normal.

hist(bass1$height)

 qqnorm(bass1$height)

## Q. 4

Comparision of distributions: First Bass, First Soporano, First Alto

alto1<- singer[67:101,]


## First Bass 
h1 <- 
  ggplot(bass1, aes(x = height)) +
  geom_histogram(bins=10) + labs(title=" First Bass")

 q1<- ggplot(bass1, aes(sample = height)) +
  geom_qq()+ labs(title="First Bass")


 
 ## First Alto
h3<- 
  ggplot(alto1, aes(x = height)) +
  geom_histogram(bins=10) + labs(title=" First Alto")

 q3<- ggplot(alto1, aes(sample = height)) +
  geom_qq()+ labs(title=" First Alto")

 
####
   
   x1 <- filter(singer, voice.part == "Bass 1" |  voice.part=="Alto 1") %>% 
       mutate(voice.part = droplevels(voice.part))
   

   
a1<- ggplot(x1, aes(y=height, x=voice.part, col=voice.part)) + 
            geom_jitter(position = position_jitter(width = .3, height=0))

a2<-ggplot(x1) + aes(x = voice.part, y = height) + geom_boxplot()

grid.arrange(h1, h3, nrow = 1)

   grid.arrange(q1, q3, nrow = 1)

 grid.arrange(a1, a2, nrow = 1)

Q.5 Manual Q-Q Plot

summary(bass1)

##      height        voice.part
##  Min.   :66.00   Bass 1 :39  
##  1st Qu.:69.00   Bass 2 : 0  
##  Median :71.00   Tenor 2: 0  
##  Mean   :70.72   Tenor 1: 0  
##  3rd Qu.:72.00   Alto 2 : 0  
##  Max.   :75.00   Alto 1 : 0  
##                  (Other): 0

summary(alto1)

##      height        voice.part
##  Min.   :60.00   Alto 1 :35  
##  1st Qu.:63.00   Bass 2 : 0  
##  Median :65.00   Bass 1 : 0  
##  Mean   :64.89   Tenor 2: 0  
##  3rd Qu.:66.50   Tenor 1: 0  
##  Max.   :72.00   Alto 2 : 0  
##                  (Other): 0

# F-Vaues for 35 dataset
i  <- 1:35
f <- (i - 0.5) / 35

round(f,2)

##  [1] 0.01 0.04 0.07 0.10 0.13 0.16 0.19 0.21 0.24 0.27 0.30 0.33 0.36 0.39 0.41
## [16] 0.44 0.47 0.50 0.53 0.56 0.59 0.61 0.64 0.67 0.70 0.73 0.76 0.79 0.81 0.84
## [31] 0.87 0.90 0.93 0.96 0.99

# Interpolation

q <- x1 %>% 
         group_by(voice.part) %>%
         arrange(height) %>%
         mutate( f.val = (row_number() - 0.5 ) / n())

qbass  <- q %>%  filter( voice.part == "Bass 1") 
qalto <- q %>%  filter( voice.part == "Alto 1") 

interp <- approx(qbass$f.val, qbass$height, f)
interp

## $x
##  [1] 0.01428571 0.04285714 0.07142857 0.10000000 0.12857143 0.15714286
##  [7] 0.18571429 0.21428571 0.24285714 0.27142857 0.30000000 0.32857143
## [13] 0.35714286 0.38571429 0.41428571 0.44285714 0.47142857 0.50000000
## [19] 0.52857143 0.55714286 0.58571429 0.61428571 0.64285714 0.67142857
## [25] 0.70000000 0.72857143 0.75714286 0.78571429 0.81428571 0.84285714
## [31] 0.87142857 0.90000000 0.92857143 0.95714286 0.98571429
## 
## $y
##  [1] 66.00000 66.34286 68.00000 68.00000 68.00000 68.00000 68.00000 68.85714
##  [9] 69.00000 69.08571 70.00000 70.00000 70.00000 70.00000 70.00000 70.00000
## [17] 70.00000 71.00000 71.00000 71.00000 71.00000 71.00000 71.57143 72.00000
## [25] 72.00000 72.00000 72.00000 72.14286 73.00000 73.00000 73.48571 74.60000
## [33] 75.00000 75.00000 75.00000

s.qq <- qalto %>% mutate(`Bass 1`  = interp$y) %>% 
                     rename(`Alto 1` = height) 

ggplot(s.qq, aes( x= `Alto 1`, y = `Bass 1`)) + geom_point() + geom_abline( intercept=0, slope=1)

## Q. 6

The q-q plot gives the relationship between two data sets(bass1 and alto1). If the two datasets are identical all dataponits fall on line paaing through origin.

I would say the follwoing plot gives more information about individual distribution as well as you can compare them simultaniously.

 grid.arrange(a1, a2, nrow = 1)

HW_9_697V

Shubham Suryawanshi

4/12/2022

By looking at QQ plot we can see all the data points; whereas a box plot provides extra detail like quartiles.

QQ-plot tells us about the type of distribution, in our case it looks like normal.

The distribution for First Soporano looks left Skewed.

Q2

By looking at QQ plot we can see all the data points; whereas a box plot provides extra detail like quartiles.

QQ-plot tells us about the type of distribution, in our case it looks like normal.

Comparision of distributions: First Bass, First Soporano, First Alto

Q.5

Manual Q-Q Plot

The q-q plot gives the relationship between two data sets(bass1 and alto1). If the two datasets are identical all dataponits fall on line paaing through origin.

I would say the follwoing plot gives more information about individual distribution as well as you can compare them simultaniously.