Framingham Markdown

#This sets the working directory
setwd("/Users/sbhaumik/Documents/MPHCourses/RforHealthScience ")

#Import Framingham dataset
library(readxl)
dat <- read_excel("/Users/sbhaumik/Downloads/frmgham2-3.xls")
view(dat)

#Familiarize yourself with dataset
head(dat)

## # A tibble: 6 × 39
##   RANDID   SEX TOTCHOL   AGE SYSBP DIABP CURSMOKE CIGPDAY   BMI DIABETES BPMEDS
##    <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>    <dbl>   <dbl> <dbl>    <dbl>  <dbl>
## 1   2448     1     195    39  106   70          0       0  27.0        0      0
## 2   2448     1     209    52  121   66          0       0  NA          0      0
## 3   6238     2     250    46  121   81          0       0  28.7        0      0
## 4   6238     2     260    52  105   69.5        0       0  29.4        0      0
## 5   6238     2     237    58  108   66          0       0  28.5        0      0
## 6   9428     1     245    48  128.  80          1      20  25.3        0      0
## # ℹ 28 more variables: HEARTRTE <dbl>, GLUCOSE <dbl>, educ <dbl>,
## #   PREVCHD <dbl>, PREVAP <dbl>, PREVMI <dbl>, PREVSTRK <dbl>, PREVHYP <dbl>,
## #   TIME <dbl>, PERIOD <dbl>, HDLC <dbl>, LDLC <dbl>, DEATH <dbl>,
## #   ANGINA <dbl>, HOSPMI <dbl>, MI_FCHD <dbl>, ANYCHD <dbl>, STROKE <dbl>,
## #   CVD <dbl>, HYPERTEN <dbl>, TIMEAP <dbl>, TIMEMI <dbl>, TIMEMIFC <dbl>,
## #   TIMECHD <dbl>, TIMESTRK <dbl>, TIMECVD <dbl>, TIMEDTH <dbl>, TIMEHYP <dbl>

ncol(dat) #There are 39 columns, or variables, in the dataset

## [1] 39

nrow(dat) #There are 11627 rows, or observations, in the dataset

## [1] 11627

#Check variable types
class(dat$CURSMOKE)

## [1] "numeric"

class(dat$PERIOD)

## [1] "numeric"

class(dat$HEARTRTE)

## [1] "numeric"

#We see that cursmoke, period and heartrte are all numeric variables

#How many unique participants?
a = filter(dat, dat$PERIOD==1)
nrow(a)

## [1] 4434

#New variable for natural log of glucose
dat$lngluc = log(dat$GLUCOSE)

#New variable for BMI category
dat$bmicat <- as.factor(ifelse(dat$BMI < 18.5, 'Underweight',
                          ifelse(dat$BMI < 25, 'Normal', 
                          ifelse(dat$BMI  < 30, 'Overweight', 
                          ifelse (dat$BMI < 35, 'Obesity Class I', 
                          ifelse (dat$BMI < 40, 'Obesity Class II',
                          ifelse(dat$BMI  >= 40, 'Obesity Class III',0)))))))
#Participants with missing data for BMI are marked as NA in this classification

#Restricting dataset to period 3
p3 = filter(dat, dat$PERIOD==3)
nrow(p3)

## [1] 3263

ncol(p3)

## [1] 41

#There are 3263 cases and 41 variables

#Export period 3 dataset
write.csv(p3, file="period3.csv")

Framingham Markdown

Smitha Bhaumik

2024-09-05