R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
years<-c(1980,1980,1985,1990) #creating a years list
scores<-c(34,44,56,83) #creating a scores list
df<-data.frame(years,scores) #creating a data frame/table containing years and scores
df[,1] #accessing values of the first element of the data frame, years
## [1] 1980 1980 1985 1990
df$years #display years
## [1] 1980 1980 1985 1990
df[,2] #accessing values of the first element of the data frame, scores
## [1] 34 44 56 83
df$scores #display scores
## [1] 34 44 56 83
df[df$scores < 50] #will dsiplay scores less that 50 and associated years
##   years scores
## 1  1980     34
## 2  1980     44
## 3  1985     56
## 4  1990     83
df[df$scores < 50, "scores"] #will only display scores less than 50
## [1] 34 44
subject_name<-c("John Doe", "Jane Doe", "Steve Graves")
temperature<-c(98.1,98.6,101.4)
flu_status<-c(FALSE,FALSE,TRUE)
temperature[2] #if we want temperature of Jane Doe, she is located at index 2
## [1] 98.6
temperature[2:3] #if we want temperature of 2nd and 3rd patient
## [1]  98.6 101.4
temperature[-2] #to exclude the temperature of the 2nd patient
## [1]  98.1 101.4
subject_name[1]
## [1] "John Doe"
subject_name[2]
## [1] "Jane Doe"
subject_name[3]
## [1] "Steve Graves"
subject_name[2:3]
## [1] "Jane Doe"     "Steve Graves"
subject_name[-3]
## [1] "John Doe" "Jane Doe"
gender<-factor(c("MALE", "FEMALE", "MALE"))
gender #will display the word Levels, this indicates it is a factor and not a string
## [1] MALE   FEMALE MALE  
## Levels: FEMALE MALE
#create another factor for blood type, added additionion levels that may not appear in original data
blood<-factor(c("O", "AB", "A"), levels=c("A", "B", "AB", "O"))
blood
## [1] O  AB A 
## Levels: A B AB O
#sets factor levels in the desired order listed ascending from lowest to highest
#attribute <- factor(data, levels=all possible values, ordered=true if you want to set an order to your levels)
symptoms<-factor(c("SEVERE","MILD","MODERATE"), levels=c("MILD","MODERATE","SEVERE"), ordered=TRUE)
symptoms
## [1] SEVERE   MILD     MODERATE
## Levels: MILD < MODERATE < SEVERE
symptoms>"MODERATE" #test whether each patient's symptoms are more severe than moderate
## [1]  TRUE FALSE FALSE
#creates a data frame for all of the following attributes
#stringAsFactors=FALSE prevents R from automatically converting every character vector to a factor, converts vectors to factors only where it makes sense
pt_data<-data.frame(subject_name, temperature, flu_status, gender, blood, symptoms, stringAsFactors=FALSE)
pt_data
##   subject_name temperature flu_status gender blood symptoms stringAsFactors
## 1     John Doe        98.1      FALSE   MALE     O   SEVERE           FALSE
## 2     Jane Doe        98.6      FALSE FEMALE    AB     MILD           FALSE
## 3 Steve Graves       101.4       TRUE   MALE     A MODERATE           FALSE
pt_data[1,2] #access the first row, second column
## [1] 98.1
pt_data[, 1] #display data in first column
## [1] "John Doe"     "Jane Doe"     "Steve Graves"
pt_data[1,] #display data in first row, all of the patients data
##   subject_name temperature flu_status gender blood symptoms stringAsFactors
## 1     John Doe        98.1      FALSE   MALE     O   SEVERE           FALSE
pt_data[c(1,3), c("temperature", "gender")] #display the first and third row/patient. display the temperature and gender only
##   temperature gender
## 1        98.1   MALE
## 3       101.4   MALE
pt_data$temp_c<-(pt_data$temperature-32)*(5/9) #creates a new column in the datframe as a function of the existing column
pt_data
##   subject_name temperature flu_status gender blood symptoms stringAsFactors
## 1     John Doe        98.1      FALSE   MALE     O   SEVERE           FALSE
## 2     Jane Doe        98.6      FALSE FEMALE    AB     MILD           FALSE
## 3 Steve Graves       101.4       TRUE   MALE     A MODERATE           FALSE
##     temp_c
## 1 36.72222
## 2 37.00000
## 3 38.55556
pt_data[c("temperature","temp_c")] #displays the original value then the new value. Useful when trying to compare values
##   temperature   temp_c
## 1        98.1 36.72222
## 2        98.6 37.00000
## 3       101.4 38.55556