R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Importing library

install.packages(‘dplyr’)

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Import and Merge

# import data
celebrity_patients <- read.csv("celebrity_patientsNA.csv", header=TRUE)
medications <- read.csv("medications.csv", header=TRUE)
procedures <- read.csv("procedures.csv", header=TRUE)

summary(procedures)
##      DATE                ID             ENCOUNTER              CODE          
##  Length:1198        Length:1198        Length:1198        Min.   :1.225e+06  
##  Class :character   Class :character   Class :character   1st Qu.:1.204e+08  
##  Mode  :character   Mode  :character   Mode  :character   Median :4.302e+08  
##                                                           Mean   :1.262e+12  
##                                                           3rd Qu.:4.302e+08  
##                                                           Max.   :7.560e+14  
##                                                                              
##   Procedure           BASE_COST         REASONCODE        REASONDESCRIPTION 
##  Length:1198        Min.   :  261.8   Min.   : 10509002   Length:1198       
##  Class :character   1st Qu.:  516.6   1st Qu.: 10509002   Class :character  
##  Mode  :character   Median :  683.0   Median : 72892002   Mode  :character  
##                     Mean   : 3391.2   Mean   :117775301                     
##                     3rd Qu.: 3431.1   3rd Qu.:195662009                     
##                     Max.   :48509.5   Max.   :410429000                     
##                                       NA's   :942
# joins the matching values
celebrity_patients$Patient_ID <- as.character(celebrity_patients$Patient_ID)
#first <- left_join(celebrity_patients, medications, by=c("Patient_ID" = "ID"))
#sec <- left_join(celebrity_patients, procedures, by=c("Patient_ID" = "ID"))
join <- inner_join(medications, procedures, by="ID")
merged_patients_df <- left_join(celebrity_patients, join, by=c("Patient_ID" = "ID"))

count(merged_patients_df)
##    n
## 1 26

Create Filtered Datasets

table(merged_patients_df$MARTIAL)
## < table of extent 0 >
table(merged_patients_df$SEX)
## 
##   9999 FEMALE   MALE 
##      1     12     13
# filters martial and sex
males_only <- filter(merged_patients_df, SEX == 'MALE', MARITAL == 'MARRIED' | MARITAL == 'SEPERATED')

table(males_only$SEX, males_only$MARITAL)
##       
##        MARRIED
##   MALE       6
females_only <- filter(merged_patients_df, SEX == 'FEMALE', MARITAL == 'MARRIED' | MARITAL == 'SEPERATED')
arrange(females_only, HLTHPLN1, desc(AGE))
##   Patient_Name Patient_ID AGE    SEX MARITAL   GENHLTH HLTHPLN1
## 1      Beyonce 2016001351  38 FEMALE MARRIED      GOOD      YES
## 2 Minaj, Nicki 2016001780  37 FEMALE MARRIED      GOOD      YES
## 3        Ciara 2016004483  34 FEMALE MARRIED      FAIR      YES
## 4      Cardi B 2016002149  27 FEMALE MARRIED VERY-GOOD      YES
##              CHECKUP1 EXERANY2 MAXDRNKS MARIJUANA   START    STOP
## 1    WITHIN PAST YEAR      YES        0       YES 2/12/10 2/19/10
## 2    WITHIN PAST YEAR      YES        2       YES 3/26/11  4/9/11
## 3    WITHIN PAST YEAR      YES        4       YES 12/6/10 12/1/11
## 4 WITHIN PAST 2 YEARS      YES        3       YES 3/26/11  4/9/11
##                                  PAYER                          ENCOUNTER.x
## 1 d47b3510-2895-3b70-9897-342d681c769d ae97dfe0-c619-44c1-a687-f215d7fac984
## 2 b1c428d6-4f07-31e0-90f0-68ffa6ff8c76 86d22444-38b4-4481-bc31-8a509a56118b
## 3 b1c428d6-4f07-31e0-90f0-68ffa6ff8c76 093e8801-fc20-46d5-9520-2775ddeb8eb5
## 4 b1c428d6-4f07-31e0-90f0-68ffa6ff8c76 86d22444-38b4-4481-bc31-8a509a56118b
##   CODE.x                       Medication BASE_COST.x PAYER_COVERAGE DISPENSES
## 1 313782 Acetaminophen 325 MG Oral Tablet        8.31              0         1
## 2 309097    Cefuroxime 250 MG Oral Tablet      163.15              0         1
## 3 831533                Errin 28 Day Pack       44.65              0        12
## 4 198405     Ibuprofen 100 MG Oral Tablet        6.13              0         1
##   TOTALCOST REASONCODE.x         REASONDESCRIPTION.x     DATE
## 1      8.31     10509002 Acute bronchitis (disorder)  2/12/10
## 2    163.15           NA                             12/14/09
## 3    535.80           NA                              9/12/14
## 4      6.13           NA                             12/14/09
##                            ENCOUNTER.y    CODE.y
## 1 ae97dfe0-c619-44c1-a687-f215d7fac984  23426006
## 2 6a6caa2e-961b-432b-8b6d-6e408ef5157f 415300000
## 3 dccbc613-e1e7-4929-9855-4e19585ca350 430193006
## 4 6a6caa2e-961b-432b-8b6d-6e408ef5157f 430193006
##                                         Procedure BASE_COST.y REASONCODE.y
## 1 Measurement of respiratory function (procedure)      516.65     10509002
## 2                   Review of systems (procedure)      516.65           NA
## 3           Medication Reconciliation (procedure)      591.07           NA
## 4           Medication Reconciliation (procedure)      565.52           NA
##           REASONDESCRIPTION.y
## 1 Acute bronchitis (disorder)
## 2                            
## 3                            
## 4

New Variables

# adds generation
males_only <- mutate(males_only, Generation = ifelse(AGE %in% 0:24, "GEN Z",
                                               ifelse(AGE %in% 25:39, "Millennial", 
                                               ifelse(AGE %in% 40:54,"Gen X",
                                               ifelse(AGE >= 55, "Baby Boomer",
                                               NA)))))
table(males_only$Generation)
## 
## Baby Boomer       Gen X  Millennial 
##           2           1           3

Export Data

write.csv(females_only, "female_patients.csv")
write.csv(males_only, "male_patients.csv")