# Install and load tidyverse
if (!require("tidyverse"))
install.packages("tidyverse")
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyverse)
mydata <- read.csv("https://raw.githubusercontent.com/drkblake/Data/main/DormTemps.csv")
head(mydata,10)
## DormID RoomTemp Range
## 1 1 61.0 Out of range
## 2 2 72.9 In range
## 3 3 67.0 In range
## 4 4 64.2 Out of range
## 5 5 62.2 Out of range
## 6 6 70.4 In range
## 7 7 62.7 Out of range
## 8 8 62.3 Out of range
## 9 9 62.2 Out of range
## 10 10 64.2 Out of range
# Specify the variable and test value
mydata$V1 <- mydata$RoomTemp
test_value = 70
ggplot(mydata, aes(x = V1)) +
geom_histogram(color = "black", fill = "#1f78b4") +
geom_vline(aes(xintercept = mean(V1)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mydata %>%
select(V1) %>%
summarise(
count = n(),
mean = mean(V1, na.rm = TRUE),
sd = sd(V1, na.rm = TRUE),
min = min(V1, na.rm = TRUE),
max = max(V1, na.rm = TRUE),
`W Statistic` = shapiro.test(V1)$statistic,
`p-value` = shapiro.test(V1)$p.value)
## count mean sd min max W Statistic p-value
## 1 175 62.73486 3.957923 53 76 0.9884747 0.1650714
t.test(mydata$V1, mu = test_value)
##
## One Sample t-test
##
## data: mydata$V1
## t = -24.283, df = 174, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 70
## 95 percent confidence interval:
## 62.14435 63.32537
## sample estimates:
## mean of x
## 62.73486
# Installing required packages
if (!require("tidyverse"))
install.packages("tidyverse")
if (!require("gmodels"))
install.packages("gmodels")
## Loading required package: gmodels
mydata <- read.csv("https://raw.githubusercontent.com/drkblake/Data/main/DormTemps.csv")
head(mydata,10)
## DormID RoomTemp Range
## 1 1 61.0 Out of range
## 2 2 72.9 In range
## 3 3 67.0 In range
## 4 4 64.2 Out of range
## 5 5 62.2 Out of range
## 6 6 70.4 In range
## 7 7 62.7 Out of range
## 8 8 62.3 Out of range
## 9 9 62.2 Out of range
## 10 10 64.2 Out of range
# Specify V1
mydata$V1 <- mydata$Range #Edit YOURDVNAME
# Look at V1
ggplot(mydata, aes(x = V1)) +
geom_bar(fill = "royalblue")
# Make the crosstab table
CrossTable(
mydata$V1,
prop.chisq = FALSE,
prop.t = FALSE,
prop.r = FALSE)
##
##
## Cell Contents
## |-------------------------|
## | N |
## |-------------------------|
##
##
## Total Observations in Table: 175
##
##
## | In range | Out of range |
## |--------------|--------------|
## | 26 | 149 |
## | 0.149 | 0.851 |
## |--------------|--------------|
##
##
##
##
# Run the chi-squared test
test <- chisq.test(table(mydata$V1))
test
##
## Chi-squared test for given probabilities
##
## data: table(mydata$V1)
## X-squared = 86.451, df = 1, p-value < 2.2e-16
Conclusion 1
The mean for this dataset rounded out to x = 62 which means that it was a lot less than what was being said by the university. The lowest range number was 67 and this was nearly 5 less so it does not reflect the statement. Therefore the university’s claims are incorrect
Conclusion 2
This data showed that 149 of the observations were out of range while 26 were in range. Only 14% were actually in range, which is wildy different than what the university was claiming. Therefore the claim that 60% were in range was inaccurate