The first think we need to do when working with a data set is make sure the data is in the same place as our code.

click “session”

“set working directory” > “change directory”

Then choose the folder in which you saved the data set.

You are now ready to import the data and take a look at the variables contained in the set.

 sleep=read.csv("SleepStudy.csv", header=TRUE)
head(sleep)

Next, let’s make sure we have the tools we need to analyze the data.

We will start with a tool to help us better work with the data set.

#only need to run this once
install.packages("dplyr", dependencies=TRUE)

If you have already installed dplyr before, please just use the following.

library(dplyr)

Attaching package: <U+393C><U+3E31>dplyr<U+393C><U+3E32>

The following objects are masked from <U+393C><U+3E31>package:stats<U+393C><U+3E32>:

    filter, lag

The following objects are masked from <U+393C><U+3E31>package:base<U+393C><U+3E32>:

    intersect, setdiff, setequal, union

We will use ggplot2 for graphics. Run the following command to load the ggplot package (we have already installed it).

#only need to run this once
install.packages("ggplot2")

If you have already installed ggplot2, please use the following.

library(ggplot2)

Exploratory Analysis: We will take a look at the data using graphical and numerical tools.

Categorical Data Analysis

LarkOwl vs. Alcohol use, for example, will give us a sense of whether alcohol use is related to when students are awake/asleep. We can begin this exploration with a bar graph.

ggplot(sleep, aes(AlcoholUse, fill=LarkOwl))+geom_bar(position = "dodge")

We can use the following command to group

sleep %>%
  group_by(AlcoholUse,LarkOwl) %>%
  summarize(n())

**Optional: Put the results into a two way table:

table=matrix(c(6,20,8,2,10,4,19,59,5,14,74,32),nrow=4,ncol=3, byrow=TRUE)
rownames(table)=c("Abstain","Heavy","Light","Moderate")
colnames(table)=c("Lark","Neither","Owl")
table
         Lark Neither Owl
Abstain     6      20   8
Heavy       2      10   4
Light      19      59   5
Moderate   14      74  32

Quantitative Data Analysis

Histogram of Sleep Quality

qplot(sleep$PoorSleepQuality,
      geom="histogram", 
      binwidth = 1,
      fill=I("tomato3"))

Boxplot of Sleep Quality

ggplot(sleep, aes(x="",y = PoorSleepQuality)) +
        geom_boxplot()+
  ggtitle("Sleep Quality Score")

Summary of Sleep Quality

summary(sleep$PoorSleepQuality)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.000   4.000   6.000   6.257   8.000  18.000 

Standard Deviation of Sleep Quality

sd(sleep$PoorSleepQuality)
[1] 2.919761

Histogram of Anxiety

qplot(sleep$AnxietyScore,
      geom="histogram", 
      binwidth = 1,
      fill=I("khaki1"))

Boxplot of Anxiety

ggplot(sleep, aes(x="",y = AnxietyScore)) +
        geom_boxplot()+
  ggtitle("Anxiety Score College Students")

Summary of Anxiety

summary(sleep$AnxietyScore)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   1.000   4.000   5.372   8.000  26.000 

Standard Deviation of Anxiety

sd(sleep$AnxietyScore)
[1] 5.197327

**Optional: Multivariable Plots

Quality of Sleep by type Lark, Neither, Owl

ggplot(sleep, aes(x=LarkOwl,y = PoorSleepQuality)) +
        geom_boxplot()+
  ggtitle("Poor Sleep Quality of College Students")

**Optional: Anxiety based on Alcohol use

ggplot(sleep, aes(x=AlcoholUse,y = AnxietyScore)) +
        geom_boxplot()+
  ggtitle("Anxiety Score College Students")

Two Variable Analysis

Here we will look at the relationship between two quantitative variables

ggplot(sleep, aes(x=PoorSleepQuality, y=AnxietyScore)) +
  geom_point(size=2)+
  ggtitle("Anxiety vs. Poor Sleep Score")

Calculate the Correlation

cor(sleep$PoorSleepQuality,sleep$AnxietyScore)
[1] 0.4513103

Fit a line to the data and add it to the plot

ggplot(sleep, aes(x=PoorSleepQuality, y=AnxietyScore)) +
  geom_point(size=2)+
  stat_smooth(method="lm",col="red",se=FALSE)+
  ggtitle("Anxiety vs. Poor Sleep Score")

Find the equation of the best fit line

#Fit the linear model
sleep.lm=lm(sleep$AnxietyScore~sleep$PoorSleepQuality)
coefficients(sleep.lm) 
           (Intercept) sleep$PoorSleepQuality 
             0.3450103              0.8033559 

The slope is 0.803 and the intercept is 0.345

So the model predicting Anxiety score from sleep quality is:

Anxiety = 0.803*PoorSleep + 0.345


**Optional: Color code by a category

ggplot(sleep, aes(x=PoorSleepQuality, y=AnxietyScore,   color=AlcoholUse)) +
  geom_point(size=2)+
  ggtitle("Anxiety vs. Poor Sleep by Alcohol Use")

LS0tDQp0aXRsZTogIlRvcGljcyBGaW5hbCBQcm9qZWN0OiBEYXRhIFNjaWVuY2Ugd2l0aCBSLVN0dWRpbyINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQpUaGUgZmlyc3QgdGhpbmsgd2UgbmVlZCB0byBkbyB3aGVuIHdvcmtpbmcgd2l0aCBhIGRhdGEgc2V0IGlzIG1ha2Ugc3VyZSB0aGUgZGF0YSBpcyBpbiB0aGUgc2FtZSBwbGFjZSBhcyBvdXIgY29kZS4gIA0KDQpjbGljayAic2Vzc2lvbiINCg0KInNldCB3b3JraW5nIGRpcmVjdG9yeSIgPiAiY2hhbmdlIGRpcmVjdG9yeSINCg0KVGhlbiBjaG9vc2UgdGhlIGZvbGRlciBpbiB3aGljaCB5b3Ugc2F2ZWQgdGhlIGRhdGEgc2V0Lg0KDQpZb3UgYXJlIG5vdyByZWFkeSB0byBpbXBvcnQgdGhlIGRhdGEgYW5kIHRha2UgYSBsb29rIGF0IHRoZSB2YXJpYWJsZXMgY29udGFpbmVkIGluIHRoZSBzZXQuDQoNCmBgYHtyfQ0KIHNsZWVwPXJlYWQuY3N2KCJTbGVlcFN0dWR5LmNzdiIsIGhlYWRlcj1UUlVFKQ0KaGVhZChzbGVlcCkNCmBgYA0KDQpOZXh0LCBsZXQncyBtYWtlIHN1cmUgd2UgaGF2ZSB0aGUgdG9vbHMgd2UgbmVlZCB0byBhbmFseXplIHRoZSBkYXRhLiANCg0KV2Ugd2lsbCBzdGFydCB3aXRoIGEgdG9vbCB0byBoZWxwIHVzIGJldHRlciB3b3JrIHdpdGggdGhlIGRhdGEgc2V0LiAgDQoNCmBgYHtyfQ0KI29ubHkgbmVlZCB0byBydW4gdGhpcyBvbmNlDQppbnN0YWxsLnBhY2thZ2VzKCJkcGx5ciIsIGRlcGVuZGVuY2llcz1UUlVFKQ0KYGBgDQoNCklmIHlvdSBoYXZlIGFscmVhZHkgaW5zdGFsbGVkIGRwbHlyIGJlZm9yZSwgcGxlYXNlIGp1c3QgdXNlIHRoZSBmb2xsb3dpbmcuDQoNCmBgYHtyfQ0KbGlicmFyeShkcGx5cikNCmBgYA0KDQoNCldlIHdpbGwgdXNlIGdncGxvdDIgZm9yIGdyYXBoaWNzLiAgUnVuIHRoZSBmb2xsb3dpbmcgY29tbWFuZCB0byBsb2FkIHRoZSBnZ3Bsb3QgcGFja2FnZSAod2UgaGF2ZSBhbHJlYWR5IGluc3RhbGxlZCBpdCkuDQoNCmBgYHtyfQ0KI29ubHkgbmVlZCB0byBydW4gdGhpcyBvbmNlDQppbnN0YWxsLnBhY2thZ2VzKCJnZ3Bsb3QyIikNCmBgYA0KDQpJZiB5b3UgaGF2ZSBhbHJlYWR5IGluc3RhbGxlZCBnZ3Bsb3QyLCBwbGVhc2UgdXNlIHRoZSBmb2xsb3dpbmcuDQoNCmBgYHtyfQ0KbGlicmFyeShnZ3Bsb3QyKQ0KYGBgDQoNCg0KRXhwbG9yYXRvcnkgQW5hbHlzaXM6ICBXZSB3aWxsIHRha2UgYSBsb29rIGF0IHRoZSBkYXRhIHVzaW5nIGdyYXBoaWNhbCBhbmQgbnVtZXJpY2FsIHRvb2xzLg0KDQpDYXRlZ29yaWNhbCBEYXRhIEFuYWx5c2lzDQoNCkxhcmtPd2wgdnMuIEFsY29ob2wgdXNlLCBmb3IgZXhhbXBsZSwgd2lsbCBnaXZlIHVzIGEgc2Vuc2Ugb2Ygd2hldGhlciBhbGNvaG9sIHVzZSBpcyByZWxhdGVkIHRvIHdoZW4gc3R1ZGVudHMgYXJlIGF3YWtlL2FzbGVlcC4gIFdlIGNhbiBiZWdpbiB0aGlzIGV4cGxvcmF0aW9uIHdpdGggYSBiYXIgZ3JhcGguIA0KDQpgYGB7cn0NCmdncGxvdChzbGVlcCwgYWVzKEFsY29ob2xVc2UsIGZpbGw9TGFya093bCkpK2dlb21fYmFyKHBvc2l0aW9uID0gImRvZGdlIikNCmBgYA0KDQpXZSBjYW4gdXNlIHRoZSBmb2xsb3dpbmcgY29tbWFuZCB0byBncm91cCANCg0KYGBge3J9DQpzbGVlcCAlPiUNCiAgZ3JvdXBfYnkoQWxjb2hvbFVzZSxMYXJrT3dsKSAlPiUNCiAgc3VtbWFyaXplKG4oKSkNCmBgYA0KDQoqKk9wdGlvbmFsOiAgUHV0IHRoZSByZXN1bHRzIGludG8gYSB0d28gd2F5IHRhYmxlOg0KDQpgYGB7cn0NCnRhYmxlPW1hdHJpeChjKDYsMjAsOCwyLDEwLDQsMTksNTksNSwxNCw3NCwzMiksbnJvdz00LG5jb2w9MywgYnlyb3c9VFJVRSkNCnJvd25hbWVzKHRhYmxlKT1jKCJBYnN0YWluIiwiSGVhdnkiLCJMaWdodCIsIk1vZGVyYXRlIikNCmNvbG5hbWVzKHRhYmxlKT1jKCJMYXJrIiwiTmVpdGhlciIsIk93bCIpDQp0YWJsZQ0KYGBgDQoNClF1YW50aXRhdGl2ZSBEYXRhIEFuYWx5c2lzDQoNCkhpc3RvZ3JhbSBvZiBTbGVlcCBRdWFsaXR5DQpgYGB7cn0NCnFwbG90KHNsZWVwJFBvb3JTbGVlcFF1YWxpdHksDQogICAgICBnZW9tPSJoaXN0b2dyYW0iLCANCiAgICAgIGJpbndpZHRoID0gMSwNCiAgICAgIGZpbGw9SSgidG9tYXRvMyIpKQ0KYGBgDQoNCkJveHBsb3Qgb2YgU2xlZXAgUXVhbGl0eQ0KYGBge3J9DQpnZ3Bsb3Qoc2xlZXAsIGFlcyh4PSIiLHkgPSBQb29yU2xlZXBRdWFsaXR5KSkgKw0KICAgICAgICBnZW9tX2JveHBsb3QoKSsNCiAgZ2d0aXRsZSgiU2xlZXAgUXVhbGl0eSBTY29yZSIpDQpgYGANCg0KU3VtbWFyeSBvZiBTbGVlcCBRdWFsaXR5DQpgYGB7cn0NCnN1bW1hcnkoc2xlZXAkUG9vclNsZWVwUXVhbGl0eSkNCmBgYA0KDQpTdGFuZGFyZCBEZXZpYXRpb24gb2YgU2xlZXAgUXVhbGl0eQ0KDQpgYGB7cn0NCnNkKHNsZWVwJFBvb3JTbGVlcFF1YWxpdHkpDQpgYGANCg0KSGlzdG9ncmFtIG9mIEFueGlldHkNCmBgYHtyfQ0KcXBsb3Qoc2xlZXAkQW54aWV0eVNjb3JlLA0KICAgICAgZ2VvbT0iaGlzdG9ncmFtIiwgDQogICAgICBiaW53aWR0aCA9IDEsDQogICAgICBmaWxsPUkoImtoYWtpMSIpKQ0KYGBgDQoNCkJveHBsb3Qgb2YgQW54aWV0eQ0KYGBge3J9DQpnZ3Bsb3Qoc2xlZXAsIGFlcyh4PSIiLHkgPSBBbnhpZXR5U2NvcmUpKSArDQogICAgICAgIGdlb21fYm94cGxvdCgpKw0KICBnZ3RpdGxlKCJBbnhpZXR5IFNjb3JlIENvbGxlZ2UgU3R1ZGVudHMiKQ0KYGBgDQoNClN1bW1hcnkgb2YgQW54aWV0eQ0KYGBge3J9DQpzdW1tYXJ5KHNsZWVwJEFueGlldHlTY29yZSkNCmBgYA0KDQpTdGFuZGFyZCBEZXZpYXRpb24gb2YgQW54aWV0eQ0KDQpgYGB7cn0NCnNkKHNsZWVwJEFueGlldHlTY29yZSkNCmBgYA0KDQoqKk9wdGlvbmFsOiBNdWx0aXZhcmlhYmxlIFBsb3RzDQoNClF1YWxpdHkgb2YgU2xlZXAgYnkgdHlwZSBMYXJrLCBOZWl0aGVyLCBPd2wNCmBgYHtyfQ0KZ2dwbG90KHNsZWVwLCBhZXMoeD1MYXJrT3dsLHkgPSBQb29yU2xlZXBRdWFsaXR5KSkgKw0KICAgICAgICBnZW9tX2JveHBsb3QoKSsNCiAgZ2d0aXRsZSgiUG9vciBTbGVlcCBRdWFsaXR5IG9mIENvbGxlZ2UgU3R1ZGVudHMiKQ0KYGBgDQoNCioqT3B0aW9uYWw6ICBBbnhpZXR5IGJhc2VkIG9uIEFsY29ob2wgdXNlDQpgYGB7cn0NCmdncGxvdChzbGVlcCwgYWVzKHg9QWxjb2hvbFVzZSx5ID0gQW54aWV0eVNjb3JlKSkgKw0KICAgICAgICBnZW9tX2JveHBsb3QoKSsNCiAgZ2d0aXRsZSgiQW54aWV0eSBTY29yZSBDb2xsZWdlIFN0dWRlbnRzIikNCmBgYA0KDQoNClR3byBWYXJpYWJsZSBBbmFseXNpcw0KDQpIZXJlIHdlIHdpbGwgbG9vayBhdCB0aGUgcmVsYXRpb25zaGlwIGJldHdlZW4gdHdvIHF1YW50aXRhdGl2ZSB2YXJpYWJsZXMNCg0KYGBge3J9DQpnZ3Bsb3Qoc2xlZXAsIGFlcyh4PVBvb3JTbGVlcFF1YWxpdHksIHk9QW54aWV0eVNjb3JlKSkgKw0KICBnZW9tX3BvaW50KHNpemU9MikrDQogIGdndGl0bGUoIkFueGlldHkgdnMuIFBvb3IgU2xlZXAgU2NvcmUiKQ0KYGBgDQoNCkNhbGN1bGF0ZSB0aGUgQ29ycmVsYXRpb24NCg0KYGBge3J9DQpjb3Ioc2xlZXAkUG9vclNsZWVwUXVhbGl0eSxzbGVlcCRBbnhpZXR5U2NvcmUpDQpgYGANCg0KRml0IGEgbGluZSB0byB0aGUgZGF0YSBhbmQgYWRkIGl0IHRvIHRoZSBwbG90DQoNCmBgYHtyfQ0KZ2dwbG90KHNsZWVwLCBhZXMoeD1Qb29yU2xlZXBRdWFsaXR5LCB5PUFueGlldHlTY29yZSkpICsNCiAgZ2VvbV9wb2ludChzaXplPTIpKw0KICBzdGF0X3Ntb290aChtZXRob2Q9ImxtIixjb2w9InJlZCIsc2U9RkFMU0UpKw0KICBnZ3RpdGxlKCJBbnhpZXR5IHZzLiBQb29yIFNsZWVwIFNjb3JlIikNCg0KYGBgDQoNCkZpbmQgdGhlIGVxdWF0aW9uIG9mIHRoZSBiZXN0IGZpdCBsaW5lDQoNCmBgYHtyfQ0KI0ZpdCB0aGUgbGluZWFyIG1vZGVsDQpzbGVlcC5sbT1sbShzbGVlcCRBbnhpZXR5U2NvcmV+c2xlZXAkUG9vclNsZWVwUXVhbGl0eSkNCmNvZWZmaWNpZW50cyhzbGVlcC5sbSkgDQpgYGANCg0KVGhlIHNsb3BlIGlzIDAuODAzIGFuZCB0aGUgaW50ZXJjZXB0IGlzIDAuMzQ1DQoNClNvIHRoZSBtb2RlbCBwcmVkaWN0aW5nIEFueGlldHkgc2NvcmUgZnJvbSBzbGVlcCBxdWFsaXR5IGlzOg0KDQpBbnhpZXR5ID0gMC44MDMqUG9vclNsZWVwICsgMC4zNDUNCg0KLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0NCioqT3B0aW9uYWw6ICBDb2xvciBjb2RlIGJ5IGEgY2F0ZWdvcnkNCg0KYGBge3J9DQpnZ3Bsb3Qoc2xlZXAsIGFlcyh4PVBvb3JTbGVlcFF1YWxpdHksIHk9QW54aWV0eVNjb3JlLCAgIGNvbG9yPUFsY29ob2xVc2UpKSArDQogIGdlb21fcG9pbnQoc2l6ZT0yKSsNCiAgZ2d0aXRsZSgiQW54aWV0eSB2cy4gUG9vciBTbGVlcCBieSBBbGNvaG9sIFVzZSIpDQpgYGANCg==