#Install the LearnBayes package
#Keep in mind that R is case-sensitive
#install.packages('LearnBayes')
#You just need to install once and then you can directly use
#so long as you access the LearnBayes package
library(LearnBayes)
#Access studentdata from the LearnBayes package
data(studentdata)
attach(studentdata)
#show part of data
head(studentdata)
## Student Height Gender Shoes Number Dvds ToSleep WakeUp Haircut Job Drink
## 1 1 67 female 10 5 10 -2.5 5.5 60 30.0 water
## 2 2 64 female 20 7 5 1.5 8.0 0 20.0 pop
## 3 3 61 female 12 2 6 -1.5 7.5 48 0.0 milk
## 4 4 61 female 3 6 40 2.0 8.5 10 0.0 water
## 5 5 70 male 4 5 6 0.0 9.0 15 17.5 pop
## 6 6 63 female NA 3 5 1.0 8.5 25 0.0 water
# Histogram of Dvds
hist(studentdata$Dvds, prob=T)
# Histogram of Dvds
summary(studentdata)
## Student Height Gender Shoes Number
## Min. : 1 Min. :54.0 female:435 Min. : 0.00 Min. : 1.00
## 1st Qu.:165 1st Qu.:64.0 male :222 1st Qu.: 6.00 1st Qu.: 4.00
## Median :329 Median :66.0 Median : 12.00 Median : 6.00
## Mean :329 Mean :66.7 Mean : 15.42 Mean : 5.67
## 3rd Qu.:493 3rd Qu.:70.0 3rd Qu.: 20.00 3rd Qu.: 7.00
## Max. :657 Max. :84.0 Max. :164.00 Max. :10.00
## NA's :10 NA's :22 NA's :2
## Dvds ToSleep WakeUp Haircut
## Min. : 0.00 Min. :-2.500 Min. : 1.000 Min. : 0.00
## 1st Qu.: 10.00 1st Qu.: 0.000 1st Qu.: 7.500 1st Qu.: 10.00
## Median : 20.00 Median : 1.000 Median : 8.500 Median : 16.00
## Mean : 30.93 Mean : 1.001 Mean : 8.383 Mean : 25.91
## 3rd Qu.: 30.00 3rd Qu.: 2.000 3rd Qu.: 9.000 3rd Qu.: 30.00
## Max. :1000.00 Max. : 6.000 Max. :13.000 Max. :180.00
## NA's :16 NA's :3 NA's :2 NA's :20
## Job Drink
## Min. : 0.00 milk :113
## 1st Qu.: 0.00 pop :178
## Median :10.50 water:355
## Mean :11.45 NA's : 11
## 3rd Qu.:17.50
## Max. :80.00
## NA's :32
# Barplot of Dvds
barplot(table(Dvds),col='red')
We observe from the barplot of Dvds (name of movie dvds owned) that the popular response values are 10 and 20. Is there any explanation for these popular values for the number of DVDs owned?
A: The students likely estimated the number of DVDs they own to the nearest 10.
# Barplot of Dvds
boxplot(Height~Gender)
# Assign boxplot to a variable named output
output=boxplot(Height~Gender)
print(output)
## $stats
## [,1] [,2]
## [1,] 57.75 65
## [2,] 63.00 69
## [3,] 64.50 71
## [4,] 67.00 72
## [5,] 73.00 76
##
## $n
## [1] 428 219
##
## $conf
## [,1] [,2]
## [1,] 64.19451 70.6797
## [2,] 64.80549 71.3203
##
## $out
## [1] 56 76 55 56 76 54 54 84 78 77 56 63 77 79 62 62 61 79 59 61 78 62
##
## $group
## [1] 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
##
## $names
## [1] "female" "male"
# Method: using aggregate()
group_means <- aggregate(Height~Gender, data = studentdata, FUN = mean)
print(group_means)
## Gender Height
## 1 female 64.75701
## 2 male 70.50767
#Calculate the mean difference of heights between male and female students
# Using the results from aggregate()
mean_diff <- group_means[2,2] - group_means[1,2]
print(mean_diff) # Output: 5.750657
## [1] 5.750657
On average, the height of male students is 5.750657 inches taller than female students.
If you want to learn about R Markdown, please refer to https://bookdown.org/yihui/rmarkdown/.
# Scatterplot of WakeUp vs ToSleep
plot(WakeUp~ToSleep)
# Least-squares fit
fit = lm(WakeUp~ToSleep)
summary(fit)
##
## Call:
## lm(formula = WakeUp ~ ToSleep)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4010 -0.9628 -0.0998 0.8249 4.6125
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.96276 0.06180 128.85 <2e-16 ***
## ToSleep 0.42472 0.03595 11.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.282 on 651 degrees of freedom
## (4 observations deleted due to missingness)
## Multiple R-squared: 0.1765, Adjusted R-squared: 0.1753
## F-statistic: 139.5 on 1 and 651 DF, p-value: < 2.2e-16
# Plotting the best fit
plot(WakeUp~ToSleep)
abline(fit, col='blue', lwd=2)