This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
?datasets
library(help='datasets')
data(iris)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
years <- c(1980, 1980, 1985, 1990)
scores <- c(34, 44,56,83)
df <- data.frame(years, scores)
#displays the first row
df[,1]
## [1] 1980 1980 1985 1990
#display the scores less than 50 with the year
df[df$scores <50, ]
## years scores
## 1 1980 34
## 2 1980 44
#display the scores less than 50 without the year
df[df$scores <50, "scores"]
## [1] 34 44
#creating a toy dataset
subject_name <- c("John Doe","Jane Doe", "Steve Graves")
temperature <- c(98.1, 98.6, 101.4)
flu_stats <- c(FALSE,FALSE,TRUE)
#display temperature for patient 2
temperature[2]
## [1] 98.6
#display temperature for patient 2 and 3
temperature[2:3]
## [1] 98.6 101.4
#display temperature for all patients except 2
temperature[-2]
## [1] 98.1 101.4
subject_name[3]
## [1] "Steve Graves"
gender <- factor(c("MALE", "FEMALE", "MALE"))
#will display levels, this indicatesit is a factor not a string
gender
## [1] MALE FEMALE MALE
## Levels: FEMALE MALE
blood <- factor(c("O","AB","A"),levels = c("A","B","AB","O"))
blood
## [1] O AB A
## Levels: A B AB O
symptoms <- factor(c("SEVERE", "MILD","MODERATE"),levels = c("MILD","MODERATE","SEVERE"),ordered = TRUE)
symptoms
## [1] SEVERE MILD MODERATE
## Levels: MILD < MODERATE < SEVERE
symptoms >"MODERATE"
## [1] TRUE FALSE FALSE
pt_data <- data.frame(subject_name, temperature, flu_stats, gender, blood, symptoms, stringsAsFactors = FALSE)
pt_data
## subject_name temperature flu_stats gender blood symptoms
## 1 John Doe 98.1 FALSE MALE O SEVERE
## 2 Jane Doe 98.6 FALSE FEMALE AB MILD
## 3 Steve Graves 101.4 TRUE MALE A MODERATE
pt_data[c(1,3),c(2,4)]
## temperature gender
## 1 98.1 MALE
## 3 101.4 MALE
#2nd patient data
pt_data[2,]
## subject_name temperature flu_stats gender blood symptoms
## 2 Jane Doe 98.6 FALSE FEMALE AB MILD
#columns are better accessed by name rather than position
pt_data[c(1,3),c("temperature","gender")]
## temperature gender
## 1 98.1 MALE
## 3 101.4 MALE
#creating new columns from existing columns
pt_data$temp_c<-(pt_data$temperature -32) * (5/9)
pt_data
## subject_name temperature flu_stats gender blood symptoms temp_c
## 1 John Doe 98.1 FALSE MALE O SEVERE 36.72222
## 2 Jane Doe 98.6 FALSE FEMALE AB MILD 37.00000
## 3 Steve Graves 101.4 TRUE MALE A MODERATE 38.55556
pt_data[c("temperature", "temp_c")]
## temperature temp_c
## 1 98.1 36.72222
## 2 98.6 37.00000
## 3 101.4 38.55556
?mpg
mpg
## # A tibble: 234 × 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto… f 18 29 p comp…
## 2 audi a4 1.8 1999 4 manu… f 21 29 p comp…
## 3 audi a4 2 2008 4 manu… f 20 31 p comp…
## 4 audi a4 2 2008 4 auto… f 21 30 p comp…
## 5 audi a4 2.8 1999 6 auto… f 16 26 p comp…
## 6 audi a4 2.8 1999 6 manu… f 18 26 p comp…
## 7 audi a4 3.1 2008 6 auto… f 18 27 p comp…
## 8 audi a4 quattro 1.8 1999 4 manu… 4 18 26 p comp…
## 9 audi a4 quattro 1.8 1999 4 auto… 4 16 25 p comp…
## 10 audi a4 quattro 2 2008 4 manu… 4 20 28 p comp…
## # ℹ 224 more rows
head(mpg)
## # A tibble: 6 × 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compa…
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compa…
## 3 audi a4 2 2008 4 manual(m6) f 20 31 p compa…
## 4 audi a4 2 2008 4 auto(av) f 21 30 p compa…
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compa…
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compa…
#displacement versus highway miles per gallon
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = hwy))
#displacement versus city miles per gallon
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = cty))
#displacement versus number of cylinders
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = cyl))
#adding color to the graph
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = hwy, color = class))
#adding shape
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = hwy, alpha = class))
## Warning: Using alpha for a discrete variable is not advised.
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = hwy), color = "blue")
#splitting plots in to facets
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = hwy)) + facet_wrap(~class, nrow = 2)