library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
setwd("C:/Users/StarKid/Desktop/Data_Science/Data_101/week_5/IC9")
day <- read.csv("day.csv")
str(day)
## 'data.frame': 731 obs. of 16 variables:
## $ instant : int 1 2 3 4 5 6 7 8 9 10 ...
## $ dteday : chr "2011-01-01" "2011-01-02" "2011-01-03" "2011-01-04" ...
## $ season : int 1 1 1 1 1 1 1 1 1 1 ...
## $ yr : int 0 0 0 0 0 0 0 0 0 0 ...
## $ mnth : int 1 1 1 1 1 1 1 1 1 1 ...
## $ holiday : int 0 0 0 0 0 0 0 0 0 0 ...
## $ weekday : int 6 0 1 2 3 4 5 6 0 1 ...
## $ workingday: int 0 0 1 1 1 1 1 0 0 1 ...
## $ weathersit: int 2 2 1 1 1 1 2 2 1 1 ...
## $ temp : num 0.344 0.363 0.196 0.2 0.227 ...
## $ atemp : num 0.364 0.354 0.189 0.212 0.229 ...
## $ hum : num 0.806 0.696 0.437 0.59 0.437 ...
## $ windspeed : num 0.16 0.249 0.248 0.16 0.187 ...
## $ casual : int 331 131 120 108 82 88 148 68 54 41 ...
## $ registered: int 654 670 1229 1454 1518 1518 1362 891 768 1280 ...
## $ cnt : int 985 801 1349 1562 1600 1606 1510 959 822 1321 ...
x <- day$temp *100
y <- day$hum * 100
day %>%
lm(x ~ y, data = .) %>%
summary()
##
## Call:
## lm(formula = x ~ y, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.907 -15.685 0.253 15.205 38.844
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.29289 3.03982 12.926 < 2e-16 ***
## y 0.16317 0.04722 3.456 0.00058 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.17 on 729 degrees of freedom
## Multiple R-squared: 0.01612, Adjusted R-squared: 0.01477
## F-statistic: 11.94 on 1 and 729 DF, p-value: 0.0005801
#Question 6
plot(hum ~ temp, data = day)
plot(x,y, main = "Temperature vs Humidity",
xlab = "Temperature", ylab = "Humdity"
, frame = FALSE) + abline(lm(y~x))
## integer(0)
model <- lm(y ~ x, data = day)
res <- resid(model)
plot(fitted(model), res)
#abline(0,0)
qqnorm(res)
#qqline(res)
plot(density(res))