R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

library(LearnBayes)
data(studentdata)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

data(studentdata)
head(studentdata)
##   Student Height Gender Shoes Number Dvds ToSleep WakeUp Haircut  Job Drink
## 1       1     67 female    10      5   10    -2.5    5.5      60 30.0 water
## 2       2     64 female    20      7    5     1.5    8.0       0 20.0   pop
## 3       3     61 female    12      2    6    -1.5    7.5      48  0.0  milk
## 4       4     61 female     3      6   40     2.0    8.5      10  0.0 water
## 5       5     70   male     4      5    6     0.0    9.0      15 17.5   pop
## 6       6     63 female    NA      3    5     1.0    8.5      25  0.0 water
hist(studentdata$Dvds,
     main = "Histogram of Number of DVDs",
     xlab = "Number of DVDs",
     col = "lightblue")

(b)

summary(studentdata$Dvds)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00   10.00   20.00   30.93   30.00 1000.00      16
table(studentdata$Dvds)
## 
##    0    1    2  2.5    3    4    5    6    7    8    9   10   11   12   13   14 
##   26   10   13    1   18    9   27   14   12   12    7   78    3   20    7    4 
##   15   16   17 17.5   18   20   21   22 22.5   23   24   25 27.5   28   29   30 
##   46    1    3    1    4   83    3    3    1    3    2   31    3    1    1   45 
##   31   33   35   36   37   40   41   42   45   46   48   50   52   53   55   60 
##    1    1   12    4    1   26    1    1    5    1    2   26    1    2    1    7 
##   62   65   67   70   73   75   80   83   85   90   97  100  120  122  130  137 
##    1    2    1    4    1    3    4    1    1    1    1   10    2    1    2    1 
##  150  152  157  175  200  250  500  900 1000 
##    6    1    1    1    8    1    1    1    1
barplot(table(studentdata$Dvds),
        col = "red",
        main = "Frequency of DVDs",
        xlab = "Number of DVDs",
        ylab = "Frequency")

Some values appear more frequently because students often give rounded or easy-to-remember numbers, such as 50 or 100.

boxplot(Height ~ Gender,
        data = studentdata,
        main = "Height by Gender",
        xlab = "Gender",
        ylab = "Height (inches)")

output <- boxplot(Height ~ Gender, data = studentdata)

output
## $stats
##       [,1] [,2]
## [1,] 57.75   65
## [2,] 63.00   69
## [3,] 64.50   71
## [4,] 67.00   72
## [5,] 73.00   76
## 
## $n
## [1] 428 219
## 
## $conf
##          [,1]    [,2]
## [1,] 64.19451 70.6797
## [2,] 64.80549 71.3203
## 
## $out
##  [1] 56 76 55 56 76 54 54 84 78 77 56 63 77 79 62 62 61 79 59 61 78 62
## 
## $group
##  [1] 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
## 
## $names
## [1] "female" "male"
mean_male <- mean(studentdata$Height[studentdata$Gender == "male"], na.rm = TRUE)
mean_female <- mean(studentdata$Height[studentdata$Gender == "female"], na.rm = TRUE)

mean_male
## [1] 70.50767
mean_female
## [1] 64.75701
mean_male - mean_female
## [1] 5.750657

On average, male students are about 5.750657 taller than female students

fit<- lm(WakeUp ~ ToSleep, data = studentdata)
plot(studentdata$ToSleep,
     studentdata$WakeUp,
     xlab = "Time to Sleep (hours past midnight)",
     ylab = "Wake Up Time",
     main = "Sleep Time vs Wake Up Time")
abline(fit, col = "blue", lwd = 2)

fit <- lm(WakeUp ~ ToSleep, data = studentdata)
summary(fit)
## 
## Call:
## lm(formula = WakeUp ~ ToSleep, data = studentdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4010 -0.9628 -0.0998  0.8249  4.6125 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.96276    0.06180  128.85   <2e-16 ***
## ToSleep      0.42472    0.03595   11.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.282 on 651 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.1765, Adjusted R-squared:  0.1753 
## F-statistic: 139.5 on 1 and 651 DF,  p-value: < 2.2e-16