PART 1: How to Open & Setup R Markdown File

Loading Packages

Installing packages that contain commands we use later in code.

#install.packages("tidyverse")

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

PART 2: Practice on a Small Scale

Practicing with Operators and Assigning Values

Practicing basic concepts for data management to learn commands.

#creating variable a, assigning value of 7
a <- 7

#creating variable b, assigning value of 3
b <- 3

#creating variable a, assigning value of 2a+b
c <- (2*a)+b

#view the values for variables a, b, and c
print(a)
## [1] 7
print(b)
## [1] 3
print(c)
## [1] 17

More Practice with Operators and Assigning Values

Learning how to override data with new assigned values.

#re-assigning variable a with value of 7
a <- 24

#re-assigning variable b with value of 32
b <- 32

#re-assigning variable c with value of (48/a) - 3*b
c <- (48/a) - (3*b)

#viewing the values for variables a, b, and c
print(a)
## [1] 24
print(b)
## [1] 32
print(c)
## [1] -94

Practicing with Lists

Learning how to create lists and subsets and checking data types

#creating a list of letters from a through e
letters <- list("a", "b", "c", "d", "e")

#checking data type of letters, confirmed to be list of characters
str(letters)
## List of 5
##  $ : chr "a"
##  $ : chr "b"
##  $ : chr "c"
##  $ : chr "d"
##  $ : chr "e"
#creating subset of letters without chr b called skip_b
skip_b <- letters %>%
  #using subset to filter out letters that are not b 
  subset(letters != "b")

#viewing contents of skip_b, same as letters but no b
print(skip_b)
## [[1]]
## [1] "a"
## 
## [[2]]
## [1] "c"
## 
## [[3]]
## [1] "d"
## 
## [[4]]
## [1] "e"

Practicing with Lists Pt. 2

More practice with lists and learning how to convert data types

#creating a list called numbers, contains 1-10
numbers <- list(1,2,3,4,5,6,7,8,9,10)

#checking data type, numbers list contains numeric data
str(numbers)
## List of 10
##  $ : num 1
##  $ : num 2
##  $ : num 3
##  $ : num 4
##  $ : num 5
##  $ : num 6
##  $ : num 7
##  $ : num 8
##  $ : num 9
##  $ : num 10
#assigning values from numbers into new variable numbers_2
numbers_2 <- numbers

#converting numbers_2 to character data type
numbers_2 <- as.character(numbers_2)

#checking data type, converted correctly to character
is.character(numbers_2)
## [1] TRUE
#new list five_plus is assigned values from list numbers
five_plus <- numbers %>%
  #using subset to filter out numbers <=4  
  subset(numbers > 4)

#viewing contents of five_plus
print(five_plus)
## [[1]]
## [1] 5
## 
## [[2]]
## [1] 6
## 
## [[3]]
## [1] 7
## 
## [[4]]
## [1] 8
## 
## [[5]]
## [1] 9
## 
## [[6]]
## [1] 10
#used numbers to create five_plus
#easier to make numeric comparison of > 4
#more code required to make character comparison

PART 3: Working with Data!

Loading Data

Setting up file path and reading data into a new dataframe.

#setting working directory to where data is
setwd("C:/Users/mdrd1/Desktop/PH141/Assignment #1")

#Creating dataframe hers for HERS data
hers <- read.csv("PH 140 W1 HERS Data.csv")

Exploring the Dataset

Exploring variables, observations, and data types within dataset.

#View the first 6 rows 
head(hers)
##   unique age             race             smoker     diabetes  bmi sbp   whr
## 1     64  56 African American Not current smoker     Diabetic 34.1 120 0.932
## 2     73  77            White Not current smoker Non-diabetic 19.6 129 0.782
## 3     90  63            White Not current smoker Non-diabetic 24.0 130 0.759
## 4    130  61            White Not current smoker Non-diabetic 24.1 118 0.845
## 5    135  63            White Not current smoker     Diabetic 30.7 171 0.926
## 6    163  68            White     Current smoker Non-diabetic 20.0 144 0.772
#View the last 6 rows
tail(hers)
##     unique age  race             smoker     diabetes  bmi sbp   whr
## 115   2647  74 White Not current smoker Non-diabetic 28.9 122 0.879
## 116   2679  75 White Not current smoker Non-diabetic 28.9 154 0.841
## 117   2683  53 White Not current smoker     Diabetic 35.1 132 1.026
## 118   2690  66 White Not current smoker Non-diabetic 32.5 129 0.845
## 119   2748  78 White Not current smoker Non-diabetic 28.9 119 0.791
## 120   2754  70 White Not current smoker     Diabetic 28.2 138 0.854
#View the structure of data
str(hers)
## 'data.frame':    120 obs. of  8 variables:
##  $ unique  : int  64 73 90 130 135 163 204 237 238 273 ...
##  $ age     : int  56 77 63 61 63 68 78 72 74 69 ...
##  $ race    : chr  "African American" "White" "White" "White" ...
##  $ smoker  : chr  "Not current smoker" "Not current smoker" "Not current smoker" "Not current smoker" ...
##  $ diabetes: chr  "Diabetic" "Non-diabetic" "Non-diabetic" "Non-diabetic" ...
##  $ bmi     : num  34.1 19.6 24 24.1 30.7 ...
##  $ sbp     : int  120 129 130 118 171 144 135 110 164 142 ...
##  $ whr     : num  0.932 0.782 0.759 0.845 0.926 ...
#View only age variable in hers
str(hers$age)
##  int [1:120] 56 77 63 61 63 68 78 72 74 69 ...

Create New Variables

Learning to create new variables from existing HERS dataset.

#creating dichotomous variable for age (>65 or <=65)
hers$ageover65 <- ifelse(hers$age>65, 1, 0)

#checking categories
table(hers$ageover65)
## 
##  0  1 
## 47 73
#creating dichotomous variable for high BP (>=140 or <140)
hers$highbp <- ifelse(hers$sbp>=140, "Yes", "No")

#checking categories
table(hers$highbp)
## 
##  No Yes 
##  82  38

Create Subsets of a Dataframe

Analyzing certain observations within the large HERS dataset.

#new dataframe with subset of ageover65
over65 <- hers %>%
  #filtering out those under 65 and under
  subset(hers$ageover65==1)

#new dataframe with subset of highbp
bphigher140 <- hers %>%
  #filtering out bp under 140
  subset(hers$highbp == "Yes")

Select Columns in a Dataframe

Specifying variables to keep in newly created dataframes.

#selecting specific columns for over65 subset
over65_2 <- over65 %>%
  #specifying columns
  select(unique, age, diabetes, bmi)

##checking to see if columns are correct in first 6 rows
head(over65_2)
##    unique age     diabetes  bmi
## 2      73  77 Non-diabetic 19.6
## 6     163  68 Non-diabetic 20.0
## 7     204  78 Non-diabetic 25.7
## 8     237  72     Diabetic 23.5
## 9     238  74     Diabetic 25.7
## 10    273  69 Non-diabetic 27.9
##checking to see if columns are correct in last 6 rows
tail(over65_2)
##     unique age     diabetes  bmi
## 114   2638  76 Non-diabetic 25.0
## 115   2647  74 Non-diabetic 28.9
## 116   2679  75 Non-diabetic 28.9
## 118   2690  66 Non-diabetic 32.5
## 119   2748  78 Non-diabetic 28.9
## 120   2754  70     Diabetic 28.2
#selecting specific columns for bphigher140 subset
bphigher140_2 <- bphigher140 %>%
  #specifying columns
  select(unique, age, smoker, sbp)

##checking to see if columns are correct in first 6 rows
head(bphigher140_2)
##    unique age             smoker sbp
## 5     135  63 Not current smoker 171
## 6     163  68     Current smoker 144
## 9     238  74 Not current smoker 164
## 10    273  69 Not current smoker 142
## 15    409  70 Not current smoker 159
## 16    414  65 Not current smoker 172
##checking to see if columns are correct in last 6 rows
tail(bphigher140_2)
##     unique age             smoker sbp
## 104   2290  75 Not current smoker 160
## 106   2419  73 Not current smoker 144
## 107   2429  75 Not current smoker 185
## 108   2444  75 Not current smoker 151
## 112   2564  69 Not current smoker 153
## 116   2679  75 Not current smoker 154