tsadata <- read.csv('tsadata.csv')
install.packages('tidyverse')
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library('tidyverse')
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library('broom')

tsadata |>
  ggplot(aes(x = TSA))+
  geom_histogram(binwidth = 1)+
  labs(title = 'Distribution of TSA scores')+
  xlab('TSA Score')+
  ylab('Frequency')

mean(tsadata$TSA)
## [1] 61.05055
sd(tsadata$TSA)
## [1] 9.278309
tsadata |>
  ggplot(aes(x = TSA)) +
  geom_histogram(aes(y = ..density..), binwidth = 1) +
  stat_function(
    fun = dnorm,
    args = list(mean = mean(tsadata$TSA), sd = sd(tsadata$TSA))
  ) +
  labs(title = 'Distribution of TSA scores')+
  xlab('TSA Score')+
  ylab('Frequency')
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

tsadata |>
  ggplot(aes(x = TSA)) +
  stat_function(
    aes(color = 'Male'),
    fun = dnorm,
    args = list(mean = mean(tsadata$TSA[tsadata$Gender == 'M']), sd = sd(tsadata$TSA[tsadata$Gender == 'M']))
) +
  stat_function(
    aes(color = 'Female'),
    fun = dnorm,
    args = list(mean = mean(tsadata$TSA[tsadata$Gender == 'F']), sd = sd(tsadata$TSA[tsadata$Gender == 'F']))) +
  labs(title = 'Distribution of TSA Scores for Males and Females')+
  xlab('TSA Score')+
  ylab('Frequency')

tsadata |>
  ggplot(aes(x = TSA, color = SchoolType)) +
  stat_function(
  aes(color = 'Independent'),
    fun = dnorm,
    args = list(mean = mean(tsadata$TSA[tsadata$SchoolType == 'I']), sd = sd(tsadata$TSA[tsadata$SchoolType == 'I']))) +
  stat_function(
    aes(color = 'State'), 
    fun = dnorm,
    args = list(mean = mean(tsadata$TSA[tsadata$SchoolType == 'S']), sd = sd(tsadata$TSA[tsadata$SchoolType == 'S']))) +
  stat_function(
    aes(color = 'Overseas'),
    fun = dnorm,
    args = list(mean = mean(tsadata$TSA[tsadata$SchoolType == 'O']), sd = sd(tsadata$TSA[tsadata$SchoolType == 'O']))) +
  labs(title = 'Distribution of TSA Scores Based on School Type')+
  xlab('TSA Score')+
  ylab('Frequency')

tsadata |>
  ggplot(aes(x = TSA)) +
  stat_function(
    aes(color = 'Admitted'),
    fun = dnorm,
    args = list(mean = mean(tsadata$TSA[tsadata$Admit == '1']), sd = sd(tsadata$TSA[tsadata$Admit == '1']))
  ) +
  stat_function(
    aes(color = 'Rejected'),
    fun = dnorm,
    args = list(mean = mean(tsadata$TSA[tsadata$Admit == '0']), sd = sd(tsadata$TSA[tsadata$Admit == '0']))) +
  labs(title = 'Distribution of TSA Scores of Successful and Unsuccessful Applicants')+
  xlab('TSA Score')+
  ylab('Frequency')

a <- mean(tsadata$TSA[tsadata$Gender == 'M'])-mean(tsadata$TSA[tsadata$Gender == 'F'])
b <- var(tsadata$TSA[tsadata$Gender == 'M'])
c <- var(tsadata$TSA[tsadata$Gender == 'F'])
d <- length(tsadata$TSA[tsadata$Gender == 'M'])
e <- length(tsadata$TSA[tsadata$Gender == 'F'])
f <- sqrt(b/c + d/e)

pnorm(a, 0 ,f)
## [1] 0.9875576
a <- mean(tsadata$TSA[tsadata$SchoolType == 'I'])-mean(tsadata$TSA[tsadata$SchoolType == 'S'])
b<- var(tsadata$TSA[tsadata$SchoolType == 'I'])
c <- var(tsadata$TSA[tsadata$SchoolType == 'S'])
d <- length(tsadata$TSA[tsadata$SchoolType == 'I'])
e <- length(tsadata$TSA[tsadata$SchoolType == 'S'])
f <- sqrt(b/c + d/e)

pnorm(a, 0 ,f)
## [1] 0.5611756
a <- mean(tsadata$TSA[tsadata$Admit == '1'])-mean(tsadata$TSA[tsadata$Admit == '0'])
b <- var(tsadata$TSA[tsadata$Admit == '1'])
c <- var(tsadata$TSA[tsadata$Admit == '0'])
d <- length(tsadata$TSA[tsadata$Admit == '1'])
e <- length(tsadata$TSA[tsadata$Admit == '0'])
f <- sqrt(b/c + d/e)

pnorm(a, 0 ,f)
## [1] 1