library(readxl)
library(moments)
## Warning: package 'moments' was built under R version 4.1.3
library(car)
## Warning: package 'car' was built under R version 4.1.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.1.3
setwd("C:/Users/rabdo/OneDrive/Desktop/HU/510 51")
#get data for kid calories
kidscalories <- read.csv("kidscalories.csv", header = TRUE)
#factor categorical data
kidscalories$helpedinprep <- factor(kidscalories$helpedinprep)
#Research question
#Do children helping making dinner end up eating eating more?
#Hypothesis testing
#H0: Children who help cooking eat the same or less amount than those who don't
#Ha: children who help cooking eat more
#test for skewness
agostino.test(kidscalories$calorieintake)
##
## D'Agostino skewness test
##
## data: kidscalories$calorieintake
## skew = -0.011821, z = -0.037082, p-value = 0.9704
## alternative hypothesis: data have a skewness
#test for normality
shapiro.test(kidscalories$calorieintake)
##
## Shapiro-Wilk normality test
##
## data: kidscalories$calorieintake
## W = 0.97936, p-value = 0.5663
hist(kidscalories$calorieintake)

#data seems normal and not skewed as shown by low agostino skewness scores (-0.01) P-value for Shapiro test greater than .05, meaning data is not significantly different from normal.
#check for variance equality
leveneTest(kidscalories$calorieintake, kidscalories$helpedinprep)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.0264 0.8716
## 45
#test with equal variance assumptions (fron leveneTest results)
t.test(calorieintake~helpedinprep, data = kidscalories, var.equal = TRUE,alternative='greater')
##
## Two Sample t-test
##
## data: calorieintake by helpedinprep
## t = 2.8137, df = 45, p-value = 0.003618
## alternative hypothesis: true difference in means between group 1 and group 2 is greater than 0
## 95 percent confidence interval:
## 34.10513 Inf
## sample estimates:
## mean in group 1 mean in group 2
## 431.3996 346.7991
tapply(kidscalories$calorieintake, kidscalories$helpedinprep, sd)
## 1 2
## 105.70124 99.50114
#SUMMARY
#In the current study, we examined the difference in the amount of food children eat between those who helped cooking and the others.Performing an independent t-test (equal variances assumed) we find there is significant difference between them (M = 431.3996; SD = 105.70124) (M = 346.7991; SD = 99.50114), t(45) = 2.8137, p = 0.003618.
#get data for cholestoral
CholestoralData <- read.csv("CholestoralData.csv")
#Research question
#Does consuming one brand of margarine help lower cholesterol level more than the other?
#Hypothesis testing
#H0: There is no difference in cholesterol level when using brand A or B margarine?
#Ha: One brand helps lower cholesterol level
#factor categorical data
CholestoralData$Margarine <- factor(CholestoralData$Margarine)
#new column for difference between before and after:
CholestoralData$difference <- CholestoralData$After - CholestoralData$Before
#test variance equality
leveneTest(CholestoralData$difference, CholestoralData$Margarine)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 15.478 0.0003431 ***
## 38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#test with unequal variance assumptions (fron leveneTest results)
t.test(difference ~ Margarine, data = CholestoralData, var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: difference by Margarine
## t = -3.9902, df = 19.854, p-value = 0.0007285
## alternative hypothesis: true difference in means between group A and group B is not equal to 0
## 95 percent confidence interval:
## -5.281831 -1.654169
## sample estimates:
## mean in group A mean in group B
## -3.7805 -0.3125
#non-parametric test
wilcox.test(CholestoralData$difference~CholestoralData$Margarine)
##
## Wilcoxon rank sum exact test
##
## data: CholestoralData$difference by CholestoralData$Margarine
## W = 86, p-value = 0.001593
## alternative hypothesis: true location shift is not equal to 0
t.test(CholestoralData$Before[CholestoralData$Margarine == "A"],
CholestoralData$After[CholestoralData$Margarine == "A"], paired = TRUE)
##
## Paired t-test
##
## data: CholestoralData$Before[CholestoralData$Margarine == "A"] and CholestoralData$After[CholestoralData$Margarine == "A"]
## t = 4.3984, df = 19, p-value = 0.0003089
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.981502 5.579498
## sample estimates:
## mean of the differences
## 3.7805
#mean
tapply(CholestoralData$difference, CholestoralData$Margarine, mean)
## A B
## -3.7805 -0.3125
#standard deviation
tapply(CholestoralData$difference, CholestoralData$Margarine, sd)
## A B
## 3.8438953 0.5764125
#SUMMARY
#When comparing the difference in cholesterol levels for individuals eating margarine A or Busing an independent-test (for unequal variances), we find margarine A (M -3.78; SD = 3.84)led to a greater reduction than B (M = -.31; SD = .58), t(19.85) = -3.99, p < .001. A follow up paired t-test to see if those in the A group showed a significant reduction. (Mean difference with 95% confidence interval [1.98, 5.58]), t(19) = 4.39, p < .001. We can conclude that margarine A appears to lower cholesterol levels significantly more than B.
#get data for priorities
PrioritiesData <- read.csv("PrioritiesData.csv", header = TRUE)
#Research question
#Do school priorities differ due to their location (rural, suburban or urban)?
#Hypothesis testing
#H0: Proportion of kids priorities are equal across locations
#Ha: Proportion of kids priorities differ across locations
#Chi Square test of independence
#look at proportions
table(PrioritiesData$Rural/(sum(PrioritiesData$Rural)))
##
## 0.28 0.34 0.38
## 1 1 1
table(PrioritiesData$Suburban/(sum(PrioritiesData$Suburban)))
##
## 0.14 0.28 0.58
## 1 1 1
table(PrioritiesData$Urban/(sum(PrioritiesData$Urban)))
##
## 0.14 0.17 0.69
## 1 1 1
chisq.test(PrioritiesData$Rural)
##
## Chi-squared test for given probabilities
##
## data: PrioritiesData$Rural
## X-squared = 1.52, df = 2, p-value = 0.4677
chisq.test(PrioritiesData$Suburban)
##
## Chi-squared test for given probabilities
##
## data: PrioritiesData$Suburban
## X-squared = 30.32, df = 2, p-value = 2.607e-07
chisq.test(PrioritiesData$Urban)
##
## Chi-squared test for given probabilities
##
## data: PrioritiesData$Urban
## X-squared = 57.38, df = 2, p-value = 3.468e-13
chisq.test(PrioritiesData[, 2:3])
##
## Pearson's Chi-squared test
##
## data: PrioritiesData[, 2:3]
## X-squared = 9.414, df = 2, p-value = 0.009032
#SUMMARY
#When looking if students’ school priorities differ across locations (rural, suburban or urban), we performed a Chi Square test of independence. The test revealed a significant difference in the proportion of kid’s priorities across communities. A closer examination revealed that for kids in rural priorities are fairly equal across (38%, 28%, and 34%). For suburban (58%, 14%, 28%) and urban (69%, 14%, and 17%), priorities differ significantly.
#get data for VotingData
VotingData <- read.csv("VotingData.csv", header = TRUE)
DRL <- c(63,310,5)
RLD <- c(69,302,5)
LDR <- c(61,308,5)
#Research question
#Does listing order impacts voting?
#Hypothesis testing
#H0: There is no difference in voting across listings
#Ha: There is a difference in voting across listings
#Chi Square test of independence
df<-data.frame(DRL,RLD,LDR)
chisq.test(df)
## Warning in chisq.test(df): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: df
## X-squared = 0.63124, df = 4, p-value = 0.9595
#proportions
table(df$DRL/(sum(df$DRL)))
##
## 0.0132275132275132 0.166666666666667 0.82010582010582
## 1 1 1
table(df$LDR/(sum(df$LDR)))
##
## 0.0133689839572193 0.163101604278075 0.823529411764706
## 1 1 1
table(df$RLD/(sum(df$RLD)))
##
## 0.0132978723404255 0.183510638297872 0.803191489361702
## 1 1 1
#SUMMARY
#We checked whether the order candidates are listed influenced votes a candidate received. Chi-Square test of independence revealed no significant difference in the proportion of votes for different candidates by candidate ordering Chi-square = 0.63124, df=4, p-value = 0.9595 Republicans votes are the highest (82%), then democrats (17%), and libertarians (1%).