library(LearnEDAfunctions)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: ggplot2
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.1.0     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(vcd)
## Warning: package 'vcd' was built under R version 4.5.2
## Loading required package: grid
data("football")
football_df <- as.data.frame(football)
names(football_df) <- c("team1", "team2")
football_df$team1 <- as.numeric(football_df$team1)
football_df$team2 <- as.numeric(football_df$team2)
bins <- seq(-0.5, max(football_df$team1) + 6.5, by = 7)
bin.mids <- (bins[-1] + bins[-length(bins)]) / 2
ggplot(football_df, aes(team1)) +
  geom_histogram(breaks = bins,
                 fill = "white",
                 color = "red") +
  labs(title = "Histogram of Winning Team Scores",
       x = "team1 score",
       y = "count")

FL <- fivenum(football_df$team1)[2]
FU <- fivenum(football_df$team1)[4]

m <- (FL + FU) / 2
s <- (FU - FL) / 1.349

m; s
## [1] 30
## [1] 13.34322
p <- ggplot(football_df, aes(team1)) +
  geom_histogram(breaks = bins)
out <- ggplot_build(p)$data[[1]] %>%
  transmute(count,
            x = (xmin + xmax)/2)
out <- mutate(out, ROOT = sqrt(count))
ggplot(out, aes(x, ROOT)) +
  geom_col() +
  labs(title = "Rootogram of team1 Scores",
       x = "Score midpoint",
       y = "sqrt(count)")

fit <- fit.gaussian(football_df$team1, bins, m, s)
rootogram(fit$counts, fit$expected, type = "hanging")

rootogram(fit$counts, fit$expected, type = "deviation")

Question 2

data("studentdata")
women <- subset(studentdata, Gender == "female")
women$Height[is.na(women$Height)] <- median(women$Height, na.rm = TRUE)
bins <- seq(floor(min(women$Height)),
            ceiling(max(women$Height)),
            by = 3)
p <- ggplot(women, aes(Height)) +
  geom_histogram(breaks = bins)
out <- ggplot_build(p)$data[[1]] %>%
  transmute(
    count,
    x = (xmin + xmax) / 2 )
out <- mutate(out, ROOT = sqrt(count))
ggplot(out, aes(x, ROOT)) +
  geom_line() +
  xlab("Bin Mid Points") +
  ylab("Root Counts")

out <- mutate(out,
              Smooth.Root = as.vector(smooth(ROOT, twiceit = TRUE)))
ggplot(out, aes(x, ROOT)) +
  geom_line() +
  geom_line(aes(x, Smooth.Root), color = "red") +
  xlab("Bin Mid Points") +
  ylab("Root Counts")

peak <- out$x[which.max(out$Smooth.Root)]
out <- mutate(out, Shift.Sq = round((x - peak)^2, 2))
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
p1 <- ggplot(out, aes(Shift.Sq, power.t(Smooth.Root, 0.5))) + geom_point() + ggtitle("Power = 0.5")
p2 <- ggplot(out, aes(Shift.Sq, power.t(Smooth.Root, 0))) + geom_point() + ggtitle("Power = 0")
p3 <- ggplot(out, aes(Shift.Sq, power.t(Smooth.Root, -0.5))) + geom_point() + ggtitle("Power = -0.5")
p4 <- ggplot(out, aes(Shift.Sq, power.t(Smooth.Root, -1))) + geom_point() + ggtitle("Power = -1")
grid.arrange(p1, p2, p3, p4)

fit <- lm(I(Smooth.Root^(-0.75)) ~ Shift.Sq, data = out)
b <- coef(fit)
g <- b[1] + b[2] * out$Shift.Sq
g[g <= 0] <- NA
out <- mutate(out,
              Final.Smooth = g^(-2 / 0.75),
              Final.S.Root = sqrt(Final.Smooth))
ggplot(out, aes(x, ROOT)) +
  geom_line() +
  geom_line(aes(x, Final.S.Root), color = "red") +
  xlab("Bin Mid Points") +
  ylab("Root Counts")

out <- mutate(out, Residual = ROOT - Final.S.Root)
ggplot(out, aes(x, Residual)) +
  geom_point() +
  geom_hline(yintercept = 0) +
  xlab("Bin Mid Points") +
  ylab("Residuals")