This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
years<-c(1980,1980,1985,1990) #creating a years list
scores<-c(34,44,56,83) #creating a scores list
df<-data.frame(years,scores) #creating a data frame/table containing years and scores
df[,1] #accessing values of the first element of the data frame, years
## [1] 1980 1980 1985 1990
df$years #display years
## [1] 1980 1980 1985 1990
df[,2] #accessing values of the first element of the data frame, scores
## [1] 34 44 56 83
df$scores #display scores
## [1] 34 44 56 83
df[df$scores < 50] #will dsiplay scores less that 50 and associated years
## years scores
## 1 1980 34
## 2 1980 44
## 3 1985 56
## 4 1990 83
df[df$scores < 50, "scores"] #will only display scores less than 50
## [1] 34 44
subject_name<-c("John Doe", "Jane Doe", "Steve Graves")
temperature<-c(98.1,98.6,101.4)
flu_status<-c(FALSE,FALSE,TRUE)
temperature[2] #if we want temperature of Jane Doe, she is located at index 2
## [1] 98.6
temperature[2:3] #if we want temperature of 2nd and 3rd patient
## [1] 98.6 101.4
temperature[-2] #to exclude the temperature of the 2nd patient
## [1] 98.1 101.4
subject_name[1]
## [1] "John Doe"
subject_name[2]
## [1] "Jane Doe"
subject_name[3]
## [1] "Steve Graves"
subject_name[2:3]
## [1] "Jane Doe" "Steve Graves"
subject_name[-3]
## [1] "John Doe" "Jane Doe"
gender<-factor(c("MALE", "FEMALE", "MALE"))
gender #will display the word Levels, this indicates it is a factor and not a string
## [1] MALE FEMALE MALE
## Levels: FEMALE MALE
#create another factor for blood type, added additionion levels that may not appear in original data
blood<-factor(c("O", "AB", "A"), levels=c("A", "B", "AB", "O"))
blood
## [1] O AB A
## Levels: A B AB O
#sets factor levels in the desired order listed ascending from lowest to highest
#attribute <- factor(data, levels=all possible values, ordered=true if you want to set an order to your levels)
symptoms<-factor(c("SEVERE","MILD","MODERATE"), levels=c("MILD","MODERATE","SEVERE"), ordered=TRUE)
symptoms
## [1] SEVERE MILD MODERATE
## Levels: MILD < MODERATE < SEVERE
symptoms>"MODERATE" #test whether each patient's symptoms are more severe than moderate
## [1] TRUE FALSE FALSE
#creates a data frame for all of the following attributes
#stringAsFactors=FALSE prevents R from automatically converting every character vector to a factor, converts vectors to factors only where it makes sense
pt_data<-data.frame(subject_name, temperature, flu_status, gender, blood, symptoms, stringAsFactors=FALSE)
pt_data
## subject_name temperature flu_status gender blood symptoms stringAsFactors
## 1 John Doe 98.1 FALSE MALE O SEVERE FALSE
## 2 Jane Doe 98.6 FALSE FEMALE AB MILD FALSE
## 3 Steve Graves 101.4 TRUE MALE A MODERATE FALSE
pt_data[1,2] #access the first row, second column
## [1] 98.1
pt_data[, 1] #display data in first column
## [1] "John Doe" "Jane Doe" "Steve Graves"
pt_data[1,] #display data in first row, all of the patients data
## subject_name temperature flu_status gender blood symptoms stringAsFactors
## 1 John Doe 98.1 FALSE MALE O SEVERE FALSE
pt_data[c(1,3), c("temperature", "gender")] #display the first and third row/patient. display the temperature and gender only
## temperature gender
## 1 98.1 MALE
## 3 101.4 MALE
pt_data$temp_c<-(pt_data$temperature-32)*(5/9) #creates a new column in the datframe as a function of the existing column
pt_data
## subject_name temperature flu_status gender blood symptoms stringAsFactors
## 1 John Doe 98.1 FALSE MALE O SEVERE FALSE
## 2 Jane Doe 98.6 FALSE FEMALE AB MILD FALSE
## 3 Steve Graves 101.4 TRUE MALE A MODERATE FALSE
## temp_c
## 1 36.72222
## 2 37.00000
## 3 38.55556
pt_data[c("temperature","temp_c")] #displays the original value then the new value. Useful when trying to compare values
## temperature temp_c
## 1 98.1 36.72222
## 2 98.6 37.00000
## 3 101.4 38.55556