tsadata <- read.csv('tsadata.csv')
install.packages('tidyverse')
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library('tidyverse')
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library('broom')
tsadata |>
ggplot(aes(x = TSA))+
geom_histogram(binwidth = 1)+
labs(title = 'Distribution of TSA scores')+
xlab('TSA Score')+
ylab('Frequency')

mean(tsadata$TSA)
## [1] 61.05055
sd(tsadata$TSA)
## [1] 9.278309
tsadata |>
ggplot(aes(x = TSA)) +
geom_histogram(aes(y = ..density..), binwidth = 1) +
stat_function(
fun = dnorm,
args = list(mean = mean(tsadata$TSA), sd = sd(tsadata$TSA))
) +
labs(title = 'Distribution of TSA scores')+
xlab('TSA Score')+
ylab('Frequency')
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

tsadata |>
ggplot(aes(x = TSA)) +
stat_function(
aes(color = 'Male'),
fun = dnorm,
args = list(mean = mean(tsadata$TSA[tsadata$Gender == 'M']), sd = sd(tsadata$TSA[tsadata$Gender == 'M']))
) +
stat_function(
aes(color = 'Female'),
fun = dnorm,
args = list(mean = mean(tsadata$TSA[tsadata$Gender == 'F']), sd = sd(tsadata$TSA[tsadata$Gender == 'F']))) +
labs(title = 'Distribution of TSA Scores for Males and Females')+
xlab('TSA Score')+
ylab('Frequency')

tsadata |>
ggplot(aes(x = TSA, color = SchoolType)) +
stat_function(
aes(color = 'Independent'),
fun = dnorm,
args = list(mean = mean(tsadata$TSA[tsadata$SchoolType == 'I']), sd = sd(tsadata$TSA[tsadata$SchoolType == 'I']))) +
stat_function(
aes(color = 'State'),
fun = dnorm,
args = list(mean = mean(tsadata$TSA[tsadata$SchoolType == 'S']), sd = sd(tsadata$TSA[tsadata$SchoolType == 'S']))) +
stat_function(
aes(color = 'Overseas'),
fun = dnorm,
args = list(mean = mean(tsadata$TSA[tsadata$SchoolType == 'O']), sd = sd(tsadata$TSA[tsadata$SchoolType == 'O']))) +
labs(title = 'Distribution of TSA Scores Based on School Type')+
xlab('TSA Score')+
ylab('Frequency')

tsadata |>
ggplot(aes(x = TSA)) +
stat_function(
aes(color = 'Admitted'),
fun = dnorm,
args = list(mean = mean(tsadata$TSA[tsadata$Admit == '1']), sd = sd(tsadata$TSA[tsadata$Admit == '1']))
) +
stat_function(
aes(color = 'Rejected'),
fun = dnorm,
args = list(mean = mean(tsadata$TSA[tsadata$Admit == '0']), sd = sd(tsadata$TSA[tsadata$Admit == '0']))) +
labs(title = 'Distribution of TSA Scores of Successful and Unsuccessful Applicants')+
xlab('TSA Score')+
ylab('Frequency')

a <- mean(tsadata$TSA[tsadata$Gender == 'M'])-mean(tsadata$TSA[tsadata$Gender == 'F'])
b <- var(tsadata$TSA[tsadata$Gender == 'M'])
c <- var(tsadata$TSA[tsadata$Gender == 'F'])
d <- length(tsadata$TSA[tsadata$Gender == 'M'])
e <- length(tsadata$TSA[tsadata$Gender == 'F'])
f <- sqrt(b/c + d/e)
pnorm(a, 0 ,f)
## [1] 0.9875576
a <- mean(tsadata$TSA[tsadata$SchoolType == 'I'])-mean(tsadata$TSA[tsadata$SchoolType == 'S'])
b<- var(tsadata$TSA[tsadata$SchoolType == 'I'])
c <- var(tsadata$TSA[tsadata$SchoolType == 'S'])
d <- length(tsadata$TSA[tsadata$SchoolType == 'I'])
e <- length(tsadata$TSA[tsadata$SchoolType == 'S'])
f <- sqrt(b/c + d/e)
pnorm(a, 0 ,f)
## [1] 0.5611756
a <- mean(tsadata$TSA[tsadata$Admit == '1'])-mean(tsadata$TSA[tsadata$Admit == '0'])
b <- var(tsadata$TSA[tsadata$Admit == '1'])
c <- var(tsadata$TSA[tsadata$Admit == '0'])
d <- length(tsadata$TSA[tsadata$Admit == '1'])
e <- length(tsadata$TSA[tsadata$Admit == '0'])
f <- sqrt(b/c + d/e)
pnorm(a, 0 ,f)
## [1] 1