Data Frame and Plotting

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

library(ggplot2)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
?datasets
library(help='datasets')
data(iris)
head(iris)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

years <- c(1980, 1980, 1985, 1990)
scores <- c(34, 44,56,83)
df <- data.frame(years, scores)
#displays the first row
df[,1]

## [1] 1980 1980 1985 1990

#display the scores less than 50 with the year
df[df$scores <50, ]

##   years scores
## 1  1980     34
## 2  1980     44

#display the scores less than 50 without the year
df[df$scores <50, "scores"]

## [1] 34 44

#creating a toy dataset
subject_name <- c("John Doe","Jane Doe", "Steve Graves")
temperature <- c(98.1, 98.6, 101.4)
flu_stats <- c(FALSE,FALSE,TRUE)
#display temperature for patient 2
temperature[2]

## [1] 98.6

#display temperature for patient 2 and 3
temperature[2:3]

## [1]  98.6 101.4

#display temperature for all patients except 2
temperature[-2]

## [1]  98.1 101.4

subject_name[3]

## [1] "Steve Graves"

gender <- factor(c("MALE", "FEMALE", "MALE"))
#will display levels, this indicatesit is a factor not a string
gender

## [1] MALE   FEMALE MALE  
## Levels: FEMALE MALE

blood <- factor(c("O","AB","A"),levels = c("A","B","AB","O"))
blood

## [1] O  AB A 
## Levels: A B AB O

symptoms <- factor(c("SEVERE", "MILD","MODERATE"),levels = c("MILD","MODERATE","SEVERE"),ordered = TRUE)
symptoms

## [1] SEVERE   MILD     MODERATE
## Levels: MILD < MODERATE < SEVERE

symptoms >"MODERATE"

## [1]  TRUE FALSE FALSE

pt_data <- data.frame(subject_name, temperature, flu_stats, gender, blood, symptoms, stringsAsFactors = FALSE)
pt_data

##   subject_name temperature flu_stats gender blood symptoms
## 1     John Doe        98.1     FALSE   MALE     O   SEVERE
## 2     Jane Doe        98.6     FALSE FEMALE    AB     MILD
## 3 Steve Graves       101.4      TRUE   MALE     A MODERATE

pt_data[c(1,3),c(2,4)]

##   temperature gender
## 1        98.1   MALE
## 3       101.4   MALE

#2nd patient data 
pt_data[2,]

##   subject_name temperature flu_stats gender blood symptoms
## 2     Jane Doe        98.6     FALSE FEMALE    AB     MILD

#columns are better accessed by name rather than position
pt_data[c(1,3),c("temperature","gender")]

##   temperature gender
## 1        98.1   MALE
## 3       101.4   MALE

#creating new columns from existing columns
pt_data$temp_c<-(pt_data$temperature -32) * (5/9)
pt_data

##   subject_name temperature flu_stats gender blood symptoms   temp_c
## 1     John Doe        98.1     FALSE   MALE     O   SEVERE 36.72222
## 2     Jane Doe        98.6     FALSE FEMALE    AB     MILD 37.00000
## 3 Steve Graves       101.4      TRUE   MALE     A MODERATE 38.55556

pt_data[c("temperature", "temp_c")]

##   temperature   temp_c
## 1        98.1 36.72222
## 2        98.6 37.00000
## 3       101.4 38.55556

?mpg
mpg

## # A tibble: 234 × 11
##    manufacturer model      displ  year   cyl trans drv     cty   hwy fl    class
##    <chr>        <chr>      <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
##  1 audi         a4           1.8  1999     4 auto… f        18    29 p     comp…
##  2 audi         a4           1.8  1999     4 manu… f        21    29 p     comp…
##  3 audi         a4           2    2008     4 manu… f        20    31 p     comp…
##  4 audi         a4           2    2008     4 auto… f        21    30 p     comp…
##  5 audi         a4           2.8  1999     6 auto… f        16    26 p     comp…
##  6 audi         a4           2.8  1999     6 manu… f        18    26 p     comp…
##  7 audi         a4           3.1  2008     6 auto… f        18    27 p     comp…
##  8 audi         a4 quattro   1.8  1999     4 manu… 4        18    26 p     comp…
##  9 audi         a4 quattro   1.8  1999     4 auto… 4        16    25 p     comp…
## 10 audi         a4 quattro   2    2008     4 manu… 4        20    28 p     comp…
## # ℹ 224 more rows

head(mpg)

## # A tibble: 6 × 11
##   manufacturer model displ  year   cyl trans      drv     cty   hwy fl    class 
##   <chr>        <chr> <dbl> <int> <int> <chr>      <chr> <int> <int> <chr> <chr> 
## 1 audi         a4      1.8  1999     4 auto(l5)   f        18    29 p     compa…
## 2 audi         a4      1.8  1999     4 manual(m5) f        21    29 p     compa…
## 3 audi         a4      2    2008     4 manual(m6) f        20    31 p     compa…
## 4 audi         a4      2    2008     4 auto(av)   f        21    30 p     compa…
## 5 audi         a4      2.8  1999     6 auto(l5)   f        16    26 p     compa…
## 6 audi         a4      2.8  1999     6 manual(m5) f        18    26 p     compa…

#displacement versus highway miles per gallon
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = hwy))

#displacement versus city miles per gallon
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = cty))

#displacement versus number of cylinders
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = cyl))

#adding color to the graph
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = hwy, color = class))

#adding shape
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = hwy, alpha = class))

## Warning: Using alpha for a discrete variable is not advised.

ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = hwy), color = "blue")

#splitting plots in to facets
ggplot(data = mpg) + geom_point(mapping = aes(x=displ, y = hwy)) + facet_wrap(~class, nrow = 2)

Data Frame and Plotting

Bezawit Tilahun

2024-02-06

R Markdown

Including Plots