R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
?datasets
library(help='datasets')
data(iris)
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
years <- c(1980, 1980, 1985, 1990)
scores <- c(34, 44,56,83)
df <- data.frame(years, scores)
#displays the first row
df[,1] 
## [1] 1980 1980 1985 1990
#display the scores less than 50 with the year
df[df$scores <50, ]
##   years scores
## 1  1980     34
## 2  1980     44
#display the scores less than 50 without the year
df[df$scores <50, "scores"]
## [1] 34 44
#creating a toy dataset
subject_name <- c("John Doe","Jane Doe", "Steve Graves")
temperature <- c(98.1, 98.6, 101.4)
flu_stats <- c(FALSE,FALSE,TRUE)
#display temperature for patient 2
temperature[2]
## [1] 98.6
#display temperature for patient 2 and 3
temperature[2:3]
## [1]  98.6 101.4
#display temperature for all patients except 2
temperature[-2]
## [1]  98.1 101.4
subject_name[3]
## [1] "Steve Graves"
gender <- factor(c("MALE", "FEMALE", "MALE"))
#will display levels, this indicatesit is a factor not a string
gender
## [1] MALE   FEMALE MALE  
## Levels: FEMALE MALE
blood <- factor(c("O","AB","A"),levels = c("A","B","AB","O"))
blood
## [1] O  AB A 
## Levels: A B AB O
symptoms <- factor(c("SEVERE", "MILD","MODERATE"),levels = c("MILD","MODERATE","SEVERE"),ordered = TRUE)
symptoms
## [1] SEVERE   MILD     MODERATE
## Levels: MILD < MODERATE < SEVERE
symptoms >"MODERATE"
## [1]  TRUE FALSE FALSE
pt_data <- data.frame(subject_name, temperature, flu_stats, gender, blood, symptoms, stringsAsFactors = FALSE)
pt_data
##   subject_name temperature flu_stats gender blood symptoms
## 1     John Doe        98.1     FALSE   MALE     O   SEVERE
## 2     Jane Doe        98.6     FALSE FEMALE    AB     MILD
## 3 Steve Graves       101.4      TRUE   MALE     A MODERATE
pt_data[c(1,3),c(2,4)]
##   temperature gender
## 1        98.1   MALE
## 3       101.4   MALE
#2nd patient data 
pt_data[2,]
##   subject_name temperature flu_stats gender blood symptoms
## 2     Jane Doe        98.6     FALSE FEMALE    AB     MILD
#columns are better accessed by name rather than position
pt_data[c(1,3),c("temperature","gender")]
##   temperature gender
## 1        98.1   MALE
## 3       101.4   MALE
#creating new columns from existing columns
pt_data$temp_c<-(pt_data$temperature -32) * (5/9)
pt_data
##   subject_name temperature flu_stats gender blood symptoms   temp_c
## 1     John Doe        98.1     FALSE   MALE     O   SEVERE 36.72222
## 2     Jane Doe        98.6     FALSE FEMALE    AB     MILD 37.00000
## 3 Steve Graves       101.4      TRUE   MALE     A MODERATE 38.55556
pt_data[c("temperature", "temp_c")]
##   temperature   temp_c
## 1        98.1 36.72222
## 2        98.6 37.00000
## 3       101.4 38.55556
?mpg
mpg
## # A tibble: 234 × 11
##    manufacturer model      displ  year   cyl trans drv     cty   hwy fl    class
##    <chr>        <chr>      <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
##  1 audi         a4           1.8  1999     4 auto… f        18    29 p     comp…
##  2 audi         a4           1.8  1999     4 manu… f        21    29 p     comp…
##  3 audi         a4           2    2008     4 manu… f        20    31 p     comp…
##  4 audi         a4           2    2008     4 auto… f        21    30 p     comp…
##  5 audi         a4           2.8  1999     6 auto… f        16    26 p     comp…
##  6 audi         a4           2.8  1999     6 manu… f        18    26 p     comp…
##  7 audi         a4           3.1  2008     6 auto… f        18    27 p     comp…
##  8 audi         a4 quattro   1.8  1999     4 manu… 4        18    26 p     comp…
##  9 audi         a4 quattro   1.8  1999     4 auto… 4        16    25 p     comp…
## 10 audi         a4 quattro   2    2008     4 manu… 4        20    28 p     comp…
## # ℹ 224 more rows
head(mpg)
## # A tibble: 6 × 11
##   manufacturer model displ  year   cyl trans      drv     cty   hwy fl    class 
##   <chr>        <chr> <dbl> <int> <int> <chr>      <chr> <int> <int> <chr> <chr> 
## 1 audi         a4      1.8  1999     4 auto(l5)   f        18    29 p     compa…
## 2 audi         a4      1.8  1999     4 manual(m5) f        21    29 p     compa…
## 3 audi         a4      2    2008     4 manual(m6) f        20    31 p     compa…
## 4 audi         a4      2    2008     4 auto(av)   f        21    30 p     compa…
## 5 audi         a4      2.8  1999     6 auto(l5)   f        16    26 p     compa…
## 6 audi         a4      2.8  1999     6 manual(m5) f        18    26 p     compa…
#displacement versus highway miles per gallon
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = hwy))

#displacement versus city miles per gallon
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = cty))

#displacement versus number of cylinders
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = cyl))

#adding color to the graph
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = hwy, color = class))

#adding shape
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = hwy, alpha = class))
## Warning: Using alpha for a discrete variable is not advised.

ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = hwy), color = "blue")

#splitting plots in to facets
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = hwy)) + facet_wrap(~class, nrow = 2)