data import

data <- read.csv("merge_wrangled_cleaned.csv")

vector data transformations

forced_penetration_women<- data %>% 
  filter(ag_forced_penetration_age_num!=Inf) %>% # this is accounting for NAs
  filter(gender=="Female")   # considering women

var <- forced_penetration_women$ag_forced_penetration_age_num

central tendency and distribution

### central tendency and distribution
var<-na.omit(var)
var<-as.numeric(as.character(var))
mean(var) #16.3004
## [1] 16.3004
var(var) #38.74274
## [1] 38.74274
sd(var) # 6.224366
## [1] 6.224366

Testing for Normality (Informal Analytical Approach)

### Testing for Normality (Informal Analytical Approach)
set.seed(3000)
data_normal<-rnorm(2944, mean=1313.264, sd=4332.279) # unsure if these parameters are meaningful


hist(data_normal)

hist(var) # visually, looks largely comparable, rougher than normal partly bc of small n.

#QQ Plots
qqnorm(data_normal)

qqnorm(var)

qqplot(data_normal, var) # sample has a higher degree of kurtosis in comparison to normal distribution, too peaked in the middle.
abline(0,1)

#Skewness and Kurtosis
library(moments)
skewness(var) # 0.02147941  # takeaway - data is very much symetrical
## [1] 0.02147941
skewness(data_normal) # 0.020939  
## [1] 0.020939
kurtosis(var) #   4.097 # take away - sample has a higher degree of kurtosis (heavier peak) than a normal distribution
## [1] 4.097042
kurtosis(data_normal) # 2.933619
## [1] 2.933619
#informal tests point toward a  leptokurtic distribution (too heavily peaked in middle)

Formal Tests for Normality

fit.normal<-fitdist(var, distr="norm")
fitdist(var, 'norm')$loglik
## [1] -821.0938
plot(fit.normal)

gofstat(fit.normal)
## Goodness-of-fit statistics
##                              1-mle-norm
## Kolmogorov-Smirnov statistic  0.1601767
## Cramer-von Mises statistic    1.4162856
## Anderson-Darling statistic    6.9532536
## 
## Goodness-of-fit criteria
##                                1-mle-norm
## Akaike's Information Criterion   1646.188
## Bayesian Information Criterion   1653.254
#Goodness-of-fit statistics
                            # 1-mle-norm
#Kolmogorov-Smirnov statistic  0.1601767
#Cramer-von Mises statistic    1.4162856
#Anderson-Darling statistic    6.9532536