Exercise 0

library(LearnBayes)

data(studentdata)

attach(studentdata)

head(studentdata)

##   Student Height Gender Shoes Number Dvds ToSleep WakeUp Haircut  Job Drink
## 1       1     67 female    10      5   10    -2.5    5.5      60 30.0 water
## 2       2     64 female    20      7    5     1.5    8.0       0 20.0   pop
## 3       3     61 female    12      2    6    -1.5    7.5      48  0.0  milk
## 4       4     61 female     3      6   40     2.0    8.5      10  0.0 water
## 5       5     70   male     4      5    6     0.0    9.0      15 17.5   pop
## 6       6     63 female    NA      3    5     1.0    8.5      25  0.0 water

hist(studentdata$Dvds, prob=T)

summary(studentdata)

##     Student        Height        Gender        Shoes            Number     
##  Min.   :  1   Min.   :54.0   female:435   Min.   :  0.00   Min.   : 1.00  
##  1st Qu.:165   1st Qu.:64.0   male  :222   1st Qu.:  6.00   1st Qu.: 4.00  
##  Median :329   Median :66.0                Median : 12.00   Median : 6.00  
##  Mean   :329   Mean   :66.7                Mean   : 15.42   Mean   : 5.67  
##  3rd Qu.:493   3rd Qu.:70.0                3rd Qu.: 20.00   3rd Qu.: 7.00  
##  Max.   :657   Max.   :84.0                Max.   :164.00   Max.   :10.00  
##                NA's   :10                  NA's   :22       NA's   :2      
##       Dvds            ToSleep           WakeUp          Haircut      
##  Min.   :   0.00   Min.   :-2.500   Min.   : 1.000   Min.   :  0.00  
##  1st Qu.:  10.00   1st Qu.: 0.000   1st Qu.: 7.500   1st Qu.: 10.00  
##  Median :  20.00   Median : 1.000   Median : 8.500   Median : 16.00  
##  Mean   :  30.93   Mean   : 1.001   Mean   : 8.383   Mean   : 25.91  
##  3rd Qu.:  30.00   3rd Qu.: 2.000   3rd Qu.: 9.000   3rd Qu.: 30.00  
##  Max.   :1000.00   Max.   : 6.000   Max.   :13.000   Max.   :180.00  
##  NA's   :16        NA's   :3        NA's   :2        NA's   :20      
##       Job          Drink    
##  Min.   : 0.00   milk :113  
##  1st Qu.: 0.00   pop  :178  
##  Median :10.50   water:355  
##  Mean   :11.45   NA's : 11  
##  3rd Qu.:17.50              
##  Max.   :80.00              
##  NA's   :32

barplot(table(Dvds),col='red')

# Popular values of 10 and 20 perhaps can perhaps indicate that there are 10 popular movies that most people enjoy?
# If someone owns more than 10 there might be 15 (indicated by the small frequency spike) or 20 movies that are far more popular resulting in people owning only those
#
# Most likely though, it could indicate that most people don't know the exact number of dvds they own resulting in them rounding their guess to the nearest 5 or 10.
# This makes the most sense as 10-20 dvds is what most people would have. After that there are spikes at every interval of 5.

boxplot(Height~Gender)

output=boxplot(Height~Gender)

print(output)

## $stats
##       [,1] [,2]
## [1,] 57.75   65
## [2,] 63.00   69
## [3,] 64.50   71
## [4,] 67.00   72
## [5,] 73.00   76
## 
## $n
## [1] 428 219
## 
## $conf
##          [,1]    [,2]
## [1,] 64.19451 70.6797
## [2,] 64.80549 71.3203
## 
## $out
##  [1] 56 76 55 56 76 54 54 84 78 77 56 63 77 79 62 62 61 79 59 61 78 62
## 
## $group
##  [1] 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
## 
## $names
## [1] "female" "male"

group_means <- aggregate(Height~Gender, data = studentdata, FUN = mean)
print(group_means)

##   Gender   Height
## 1 female 64.75701
## 2   male 70.50767

mean_diff <- group_means[2,2] - group_means[1,2]
print(mean_diff)

## [1] 5.750657

# On average male students are 5.750657 inches taller than female students

plot(ToSleep, WakeUp)
fit = lm(WakeUp~ToSleep)
summary(fit)

## 
## Call:
## lm(formula = WakeUp ~ ToSleep)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4010 -0.9628 -0.0998  0.8249  4.6125 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.96276    0.06180  128.85   <2e-16 ***
## ToSleep      0.42472    0.03595   11.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.282 on 651 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.1765, Adjusted R-squared:  0.1753 
## F-statistic: 139.5 on 1 and 651 DF,  p-value: < 2.2e-16

abline(fit, col='blue', lwd=2)