week 12 lab

# Install and load tidyverse
if (!require("tidyverse"))
  install.packages("tidyverse")

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(tidyverse)

mydata <- read.csv("https://raw.githubusercontent.com/drkblake/Data/main/DormTemps.csv")

# Specify the variable and test value
mydata$V1 <- mydata$RoomTemp
test_value = 70

ggplot(mydata, aes(x = V1)) +
  geom_histogram(color = "black", fill = "#1f78b4") +
  geom_vline(aes(xintercept = mean(V1)))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mydata %>%
  select(V1) %>%
  summarise(
    count = n(),
    mean = mean(V1, na.rm = TRUE),
    sd = sd(V1, na.rm = TRUE),
    min = min(V1, na.rm = TRUE),
    max = max(V1, na.rm = TRUE),
    `W Statistic` = shapiro.test(V1)$statistic,
    `p-value` = shapiro.test(V1)$p.value)

##   count     mean       sd min max W Statistic   p-value
## 1   175 62.73486 3.957923  53  76   0.9884747 0.1650714

t.test(mydata$V1, mu = test_value)

## 
##  One Sample t-test
## 
## data:  mydata$V1
## t = -24.283, df = 174, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 70
## 95 percent confidence interval:
##  62.14435 63.32537
## sample estimates:
## mean of x 
##  62.73486

# Installing required packages
if (!require("tidyverse"))
  install.packages("tidyverse")
if (!require("gmodels"))
  install.packages("gmodels")

## Loading required package: gmodels

mydata <- read.csv("https://raw.githubusercontent.com/drkblake/Data/main/DormTemps.csv")

mydata$V1 <- mydata$Range
ggplot(mydata, aes(x = V1)) +
  geom_bar(fill = "royalblue")

# Make the crosstab table
CrossTable(
  mydata$V1,
  prop.chisq = FALSE,
  prop.t = FALSE,
  prop.r = FALSE)

## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |-------------------------|
## 
##  
## Total Observations in Table:  175 
## 
##  
##              |     In range | Out of range | 
##              |--------------|--------------|
##              |           26 |          149 | 
##              |        0.149 |        0.851 | 
##              |--------------|--------------|
## 
## 
## 
##

test <- chisq.test(table(mydata$V1))
test

## 
##  Chi-squared test for given probabilities
## 
## data:  table(mydata$V1)
## X-squared = 86.451, df = 1, p-value < 2.2e-16

About 14% are in range. The lowestest appears to be 2 and the highest is 20.

week 12 lab

Tiffany Haynes

2024-04-05