# Install and load tidyverse
if (!require("tidyverse"))
  install.packages("tidyverse")
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyverse)

mydata <- read.csv("https://raw.githubusercontent.com/drkblake/Data/main/DormTemps.csv")
head(mydata,10)
##    DormID RoomTemp        Range
## 1       1     61.0 Out of range
## 2       2     72.9     In range
## 3       3     67.0     In range
## 4       4     64.2 Out of range
## 5       5     62.2 Out of range
## 6       6     70.4     In range
## 7       7     62.7 Out of range
## 8       8     62.3 Out of range
## 9       9     62.2 Out of range
## 10     10     64.2 Out of range
# Specify the variable and test value
mydata$V1 <- mydata$RoomTemp
test_value = 70

ggplot(mydata, aes(x = V1)) +
  geom_histogram(color = "black", fill = "#1f78b4") +
  geom_vline(aes(xintercept = mean(V1)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mydata %>%
  select(V1) %>%
  summarise(
    count = n(),
    mean = mean(V1, na.rm = TRUE),
    sd = sd(V1, na.rm = TRUE),
    min = min(V1, na.rm = TRUE),
    max = max(V1, na.rm = TRUE),
    `W Statistic` = shapiro.test(V1)$statistic,
    `p-value` = shapiro.test(V1)$p.value)
##   count     mean       sd min max W Statistic   p-value
## 1   175 62.73486 3.957923  53  76   0.9884747 0.1650714
t.test(mydata$V1, mu = test_value)
## 
##  One Sample t-test
## 
## data:  mydata$V1
## t = -24.283, df = 174, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 70
## 95 percent confidence interval:
##  62.14435 63.32537
## sample estimates:
## mean of x 
##  62.73486
# Installing required packages
if (!require("tidyverse"))
  install.packages("tidyverse")
if (!require("gmodels"))
  install.packages("gmodels")
## Loading required package: gmodels
mydata <- read.csv("https://raw.githubusercontent.com/drkblake/Data/main/DormTemps.csv")
head(mydata,10)
##    DormID RoomTemp        Range
## 1       1     61.0 Out of range
## 2       2     72.9     In range
## 3       3     67.0     In range
## 4       4     64.2 Out of range
## 5       5     62.2 Out of range
## 6       6     70.4     In range
## 7       7     62.7 Out of range
## 8       8     62.3 Out of range
## 9       9     62.2 Out of range
## 10     10     64.2 Out of range
# Specify V1
mydata$V1 <- mydata$Range #Edit YOURDVNAME

# Look at V1
ggplot(mydata, aes(x = V1)) +
  geom_bar(fill = "royalblue")

# Make the crosstab table
CrossTable(
  mydata$V1,
  prop.chisq = FALSE,
  prop.t = FALSE,
  prop.r = FALSE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |-------------------------|
## 
##  
## Total Observations in Table:  175 
## 
##  
##              |     In range | Out of range | 
##              |--------------|--------------|
##              |           26 |          149 | 
##              |        0.149 |        0.851 | 
##              |--------------|--------------|
## 
## 
## 
## 
# Run the chi-squared test
test <- chisq.test(table(mydata$V1))
test
## 
##  Chi-squared test for given probabilities
## 
## data:  table(mydata$V1)
## X-squared = 86.451, df = 1, p-value < 2.2e-16
              

Conclusion 1

  1. The mean for this dataset rounded out to x = 62 which means that it was a lot less than what was being said by the university. The lowest range number was 67 and this was nearly 5 less so it does not reflect the statement. Therefore the university’s claims are incorrect

    Conclusion 2

  2. This data showed that 149 of the observations were out of range while 26 were in range. Only 14% were actually in range, which is wildy different than what the university was claiming. Therefore the claim that 60% were in range was inaccurate