library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
setwd("C:/Users/eyong/Downloads/New folder - Copy")
nba <- read_csv("NBAStandings2019.csv")
## Rows: 30 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Team
## dbl (5): Wins, Losses, WinPct, PtsFor, PtsAgainst
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
model_1 <- lm(WinPct~ PtsFor+PtsAgainst ,data = nba)
summary(model_1)
##
## Call:
## lm(formula = WinPct ~ PtsFor + PtsAgainst, data = nba)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.065838 -0.017353 0.000247 0.019217 0.055061
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.302999 0.198357 1.528 0.138
## PtsFor 0.030244 0.001405 21.522 <2e-16 ***
## PtsAgainst -0.028470 0.001494 -19.050 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03003 on 27 degrees of freedom
## Multiple R-squared: 0.961, Adjusted R-squared: 0.9581
## F-statistic: 332.5 on 2 and 27 DF, p-value: < 2.2e-16
Equation: Winpct =0.303 +0.0302 * ptsFor -0.0285 * PtsAgainst
The eventual NBA playoff champion Toronto Raptors won 58 games in the regular season while losing only 24 games (WinPCT = 0.707). They scored an average of 114.4 points per game while giving up an average of 108.4 points against. Find the predicted winning percentage for the Raptors using this model and compute the residual.
Comment on the effectiveness of each predictor in this model. (p-values) both p-values are significant .therfore deffnese and offesnes stategies are improatnt for winning games
Do we do much better by including both predictors in a model? Choose some measure to compare the effectiveness of a simple linear model based on either PtsFor or PtsAgainst to this two-predictor model ## the model with the first predictor is better
prop.test(c(84200, 102598 ), c(144790 , 211693), alternative = "greater")
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(84200, 102598) out of c(144790, 211693)
## X-squared = 3234.9, df = 1, p-value < 2.2e-16
## alternative hypothesis: greater
## 95 percent confidence interval:
## 0.09408942 1.00000000
## sample estimates:
## prop 1 prop 2
## 0.5815319 0.4846547
WinPct = 0.303 + 0.0302PTSfor - 0.0285 PysAgainst WinPct= 0.303 + 0.0302* 114 - 0.0285* 108.4 = 0.676 = 67.6%
Residual :0.707-0676 = 0.031
C/ Effectiveness of each predictor in this model :
info<- summary(model_1)
info$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.30299945 0.198357432 1.527543 1.382574e-01
## PtsFor 0.03024363 0.001405245 21.521964 1.574358e-18
## PtsAgainst -0.02846984 0.001494489 -19.049885 3.485649e-17
Both Predictors , r_squared = 0.961 with one :
model_1 <- lm(WinPct ~ PtsFor, data = nba)
summary(model_1)
##
## Call:
## lm(formula = WinPct ~ PtsFor, data = nba)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.19736 -0.08943 0.02367 0.08390 0.17095
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.133765 0.565723 -3.772 0.000772 ***
## PtsFor 0.023684 0.005084 4.659 7.05e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1121 on 28 degrees of freedom
## Multiple R-squared: 0.4366, Adjusted R-squared: 0.4165
## F-statistic: 21.7 on 1 and 28 DF, p-value: 7.051e-05
model_2 <- lm(WinPct ~ PtsFor, data = nba)
summary(model_2)
##
## Call:
## lm(formula = WinPct ~ PtsFor, data = nba)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.19736 -0.08943 0.02367 0.08390 0.17095
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.133765 0.565723 -3.772 0.000772 ***
## PtsFor 0.023684 0.005084 4.659 7.05e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1121 on 28 degrees of freedom
## Multiple R-squared: 0.4366, Adjusted R-squared: 0.4165
## F-statistic: 21.7 on 1 and 28 DF, p-value: 7.051e-05
evidence that there is a difference in the mean number of hours spent watching television per week between males and females.
library(tidyverse)
setwd("C:/Users/eyong/Downloads/New folder - Copy")
student_survey <- read_csv("StudentSurvey.csv")
## Rows: 79 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Year, Sex, Smoke, Award, HigherSAT
## dbl (12): Exercise, TV, Height, Weight, Siblings, BirthOrder, VerbalSAT, Mat...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tv_hours_test <- t.test(TV ~ Sex, data = student_survey)
tv_hours_test$p.value
## [1] 0.001271132
p.value is 0.001271132 which is less than 0.05, Reject the null hypothesis. There is significant evidence of a difference in TV watching hours between males and females.
setwd("C:/Users/eyong/Downloads/New folder - Copy")
icu_data <- read_csv("ICUAdmissions.csv")
## Rows: 200 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (21): ID, Status, Age, Sex, Race, Service, Cancer, Renal, Infection, CPR...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
infection_prop_test <- prop.test(
x = table(icu_data$Sex, icu_data$Infection)[,2],
n = table(icu_data$Sex)
)
infection_prop_test$p.value
## [1] 0.8640718
the p value is 0.8640718 which is greater than so Fail to reject the null hypothesis There is not enough evidence to conclude a difference in infection proportions between males and females
H0: No association between sex and Black Friday shopping plans H1: There is an association between sex and Black Friday shopping plans
shopping_data <- data.frame(
Male = c(82, 100),
Female = c(433, 400),
row.names = c("Shopping", "Not Shopping")
)
# Perform Chi-squared test
chi_squared_test <- chisq.test(shopping_data)
chi_squared_test$p.value
## [1] 0.1071273
p.value is 0.1071273 which is greater 0.05
Fail to reject the null hypothesis. There is not enough evidence to conclude an association between sex and Black Friday shopping plans
H0: Mean hang hours are the same across all school pressure levels H1: At least one school pressure level has a different mean of hang hours
setwd("C:/Users/eyong/Downloads/New folder - Copy")
pa_seniors <- read_csv("PASeniors.csv")
## Rows: 457 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): Gender, Hand, GetToSchool, Activity, Music, BirthMonth, Season, Al...
## dbl (20): Year, Age, Height, Foot, Armspan, Languages, TravelTime, ReactionT...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
anova_model <- aov(HangHours ~ SchoolPressure, data = pa_seniors)
anova_model
## Call:
## aov(formula = HangHours ~ SchoolPressure, data = pa_seniors)
##
## Terms:
## SchoolPressure Residuals
## Sum of Squares 1563.48 48557.41
## Deg. of Freedom 3 443
##
## Residual standard error: 10.4695
## Estimated effects may be unbalanced
## 10 observations deleted due to missingness
summary(anova_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## SchoolPressure 3 1563 521.2 4.755 0.00283 **
## Residuals 443 48557 109.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 10 observations deleted due to missingness
the p value is 0.00283 which is less than 0.05 so we fail to reject
the null hypothesis
There is significant evidence that the amount of school pressure is
related to time spent hanging out with friends