Assignment 2

library(HistData)
library(vcd)
## Loading required package: grid
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
data("Arbuthnot")
str(Arbuthnot)
## 'data.frame':    82 obs. of  7 variables:
##  $ Year     : int  1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 ...
##  $ Males    : int  5218 4858 4422 4994 5158 5035 5106 4917 4703 5359 ...
##  $ Females  : int  4683 4457 4102 4590 4839 4820 4928 4605 4457 4952 ...
##  $ Plague   : int  0 1317 274 8 0 1 0 10400 3082 363 ...
##  $ Mortality: int  8771 10554 8562 9535 8393 10400 10651 23359 11763 13624 ...
##  $ Ratio    : num  1.11 1.09 1.08 1.09 1.07 ...
##  $ Total    : num  9.9 9.31 8.52 9.58 10 ...
max <- max(Arbuthnot$Ratio) 
min <- min(Arbuthnot$Ratio)
mean <- mean(Arbuthnot$Ratio)

plot_ly(Arbuthnot, x = ~Year, y = ~Ratio, 
        name = 'Sex: Male vs. Female',
        type = 'scatter', mode = 'lines',line = list(color = 'Black', width = 2)) %>%

  add_trace(y = ~max, name = 'Maximum', mode = 'lines',line = list(color = 'red', width = 2)) %>%
  add_trace(y = ~min, name = 'Minimum', mode = 'lines',line = list(color = 'green', width = 2)) %>%
  add_trace(y = ~mean, name = 'Mean', mode = 'lines',line = list(color = 'blue', width = 2)) 
#The graph shows that a higher number of boys were born in London from year 1629 to 1710. With the newborn boys peaking in ratio 1659 and dropping to its lowest in 1703.
maximum=max(Arbuthnot$Total) 
minimum=min(Arbuthnot$Total)

plot_ly(Arbuthnot, x = ~Year, y = ~Total, name = 'Total Number of Christenings (in 000s))', type = 'scatter', mode = 'line',line = list(color = 'blue', width = 1.5)) %>%
add_trace(y = ~maximum, name = 'Max Christening', line = list(color = 'green', width = 2)) %>%
add_trace(y = ~minimum, name = 'Min Christening', line = list(color = 'red', width = 2)) %>%
layout(xaxis = list(title = "Year"), 
       yaxis = list (title = "Total Number of Christenings (in 000s)"))
## A line object has been specified, but lines is not in the mode
## Adding lines to the mode...
## A line object has been specified, but lines is not in the mode
## Adding lines to the mode...
## A line object has been specified, but lines is not in the mode
## Adding lines to the mode...
#A significant drop is noticed in the number of christenings between 1648 and 1658, as well as 1704, which suggests further research must be needed to understand the reason behind this. More christenings were done after 1660 and reached its peak in 1698. In 1704, there was a sudden drop and went up to its peak again.
data('WomenQueue')
str(WomenQueue)
##  table [1:11(1d)] 1 3 4 23 25 19 18 5 1 1 ...
##  - attr(*, "dimnames")=List of 1
##   ..$ nWomen: chr [1:11] "0" "1" "2" "3" ...
gf <- goodfit(WomenQueue, type = 'binomial', par = list(size = 10))
plot(gf,type = 'hanging', shade = TRUE)

distplot(WomenQueue, type = c('binomial'), conf_level = 0.95)
## Warning in distplot(WomenQueue, type = c("binomial"), conf_level = 0.95):
## size was not given, taken as maximum count

gf_bino <- goodfit(WomenQueue, type = 'binomial', par = list(size = 10,prob = 1/2))
plot(gf_bino,type = 'hanging', shade = TRUE)
## Warning in max.c * pmax(pmin(interpolate(abs(res)), 1), 0): longer object
## length is not a multiple of shorter object length
## Warning in max.c * pmax(pmin(interpolate(abs(res)), 1), 0): longer object
## length is not a multiple of shorter object length

data('Saxony', package = 'vcd')
gf_Sax <- goodfit(Saxony, type = "binomial", par = list(prob = .5,size = 12))
summary(gf_Sax)
## Warning in summary.goodfit(gf_Sax): Chi-squared approximation may be
## incorrect
## 
##   Goodness-of-fit test for binomial distribution
## 
##                       X^2 df     P(> X^2)
## Pearson          249.1954 12 2.013281e-46
## Likelihood Ratio 205.4060 12 2.493625e-37
gf_Sax1 <- goodfit(Saxony, type = "binomial", par = list(size = 12))
summary(gf_Sax1)
## 
##   Goodness-of-fit test for binomial distribution
## 
##                      X^2 df     P(> X^2)
## Likelihood Ratio 97.0065 11 6.978187e-16
plot(gf_Sax, xlab="Number of Males I", main = 'I')