R Markdown File

02) DATA OVERVIEW

I. Environment Setup

#Installing Libraries for the Project #install.packages(“tidyverse”) #install.packages(“readxl”)

Loading the packages for the project

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(dplyr)

02) DATA PREPARATION

A. Data Acquisition (Collection/Loading/Copying)

Define the data file path. Adjust the file path and name as needed

Loading the dataset into dataframe df and creating copy of the raw dataset

B. Data Comprehension

(Checking Data Attributes-Inspect Head/Tail, Data Shape, Data Info, Data Columns,

Categorical/Numerical Data column names with counts, dtypes, target variables, find null value counts,

find duplicate value counts, create duplicates variables to extract duplicate entries,

find unique columns, tally total/duplicate columns)

# Checking Data Attributes

head(df)    # Head of the Data
## # A tibble: 6 × 6
##     AGE FEMALE   LOS  RACE TOTCHG APRDRG
##   <dbl>  <dbl> <dbl> <dbl>  <dbl>  <dbl>
## 1    17      1     2     1   2660    560
## 2    17      0     2     1   1689    753
## 3    17      1     7     1  20060    930
## 4    17      1     1     1    736    758
## 5    17      1     1     1   1194    754
## 6    17      0     0     1   3305    347
tail(df)    # Tail of the Data
## # A tibble: 6 × 6
##     AGE FEMALE   LOS  RACE TOTCHG APRDRG
##   <dbl>  <dbl> <dbl> <dbl>  <dbl>  <dbl>
## 1     0      1     3     1   1886    640
## 2     0      1     6     1   5881    636
## 3     0      1     2     1   1171    640
## 4     0      1     2     1   1171    640
## 5     0      1     2     1   1086    640
## 6     0      0     4     1   4931    640
dim(df)     # Shape of the Data - dimensions nrow(), ncol()
## [1] 500   6
length(df)  # Length of the Data - no. of columns
## [1] 6
nrow(df)    # No. of entries in the dataset
## [1] 500
str(df)     # Data Information
## tibble [500 × 6] (S3: tbl_df/tbl/data.frame)
##  $ AGE   : num [1:500] 17 17 17 17 17 17 17 16 16 17 ...
##  $ FEMALE: num [1:500] 1 0 1 1 1 0 1 1 1 1 ...
##  $ LOS   : num [1:500] 2 2 7 1 1 0 4 2 1 2 ...
##  $ RACE  : num [1:500] 1 1 1 1 1 1 1 1 1 1 ...
##  $ TOTCHG: num [1:500] 2660 1689 20060 736 1194 ...
##  $ APRDRG: num [1:500] 560 753 930 758 754 347 754 754 753 758 ...
glimpse(df) # From the Dplyr package, (part of tidyverse)
## Rows: 500
## Columns: 6
## $ AGE    <dbl> 17, 17, 17, 17, 17, 17, 17, 16, 16, 17, 17, 15, 15, 15, 15, 15,…
## $ FEMALE <dbl> 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ LOS    <dbl> 2, 2, 7, 1, 1, 0, 4, 2, 1, 2, 2, 2, 2, 4, 7, 4, 1, 4, 3, 3, 1, …
## $ RACE   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ TOTCHG <dbl> 2660, 1689, 20060, 736, 1194, 3305, 2205, 1167, 532, 1363, 1245…
## $ APRDRG <dbl> 560, 753, 930, 758, 754, 347, 754, 754, 753, 758, 758, 753, 751…
col_names_vec <- colnames(df) # Finding the names of the columns
col_names_vec <- names(df)    # Finding the names of the columns
col_names_list <- as.list(col_names_vec) #Enlisting the name of the columns
col_names_vec
## [1] "AGE"    "FEMALE" "LOS"    "RACE"   "TOTCHG" "APRDRG"
col_names_list
## [[1]]
## [1] "AGE"
## 
## [[2]]
## [1] "FEMALE"
## 
## [[3]]
## [1] "LOS"
## 
## [[4]]
## [1] "RACE"
## 
## [[5]]
## [1] "TOTCHG"
## 
## [[6]]
## [1] "APRDRG"
sapply(df, class) # Finding the class or Data types of each and all column names
##       AGE    FEMALE       LOS      RACE    TOTCHG    APRDRG 
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
sum(is.na(df)) # Count Total Null Values in the Entire Data Frame
## [1] 1
# Summarising using Tidyverse dplyr
df %>%
  summarise(across(everything(), ~ sum(is.na(.))))
## # A tibble: 1 × 6
##     AGE FEMALE   LOS  RACE TOTCHG APRDRG
##   <int>  <int> <int> <int>  <int>  <int>
## 1     0      0     0     1      0      0
# Count the number of rows that are exact duplicates
sum(duplicated(df))
## [1] 26

C. Data Wrangling/Munging (Data Quality Assessment - Preprocess & Cleaning - Date Parsing,

Missing Values or Null Values Treatment, Duplicate Entries Removal, Renaming Columns,

Sorting Columns, Merging Datasets, Distribution Check, Anomalies/Outliers Detection and Treatment,

Data Type Corrections, Memory & Performance Optimization, Handling Imbalanced Data, Feature Scaling)

#### Removing Duplicate Enties


# View all rows that are exact duplicates (all columns match a prior entry)
df[duplicated(df), ]
## # A tibble: 26 × 6
##      AGE FEMALE   LOS  RACE TOTCHG APRDRG
##    <dbl>  <dbl> <dbl> <dbl>  <dbl>  <dbl>
##  1     0      1     2     1   1191    640
##  2     0      0     2     1   1280    640
##  3     0      1     2     1   1096    640
##  4     0      0     3     1   1669    640
##  5     0      1     2     1   1096    640
##  6     0      1     2     1   1096    640
##  7     0      0     2     1   1395    640
##  8     0      0     2     1   1286    640
##  9     0      1     2     1   1096    640
## 10     0      0     2     1   1280    640
## # ℹ 16 more rows
# Using dplyr::distinct() to remove duplicates (recommended step)
#library(dplyr)
df <- df %>%
  distinct()

# Using base R unique()
df <- unique(df)


#### Renaming Column Names

names(df) <- c("AGE","GENDER","LOS","RACE","TOTCHG","APRDRG")
names(df)
## [1] "AGE"    "GENDER" "LOS"    "RACE"   "TOTCHG" "APRDRG"
str(df) # Confirming whether the name has changed in the dataframe
## tibble [474 × 6] (S3: tbl_df/tbl/data.frame)
##  $ AGE   : num [1:474] 17 17 17 17 17 17 17 16 16 17 ...
##  $ GENDER: num [1:474] 1 0 1 1 1 0 1 1 1 1 ...
##  $ LOS   : num [1:474] 2 2 7 1 1 0 4 2 1 2 ...
##  $ RACE  : num [1:474] 1 1 1 1 1 1 1 1 1 1 ...
##  $ TOTCHG: num [1:474] 2660 1689 20060 736 1194 ...
##  $ APRDRG: num [1:474] 560 753 930 758 754 347 754 754 753 758 ...
#### Null Values Treatment

colSums(is.na(df)) # Count Null Values per Column
##    AGE GENDER    LOS   RACE TOTCHG APRDRG 
##      0      0      0      1      0      0
# Find all rows that contain at least one NA value
df[rowSums(is.na(df)) > 0, ]
## # A tibble: 1 × 6
##     AGE GENDER   LOS  RACE TOTCHG APRDRG
##   <dbl>  <dbl> <dbl> <dbl>  <dbl>  <dbl>
## 1     0      1     2    NA   1156    640
# Find rows where the RACE column has an NA value
df[is.na(df$RACE), ]
## # A tibble: 1 × 6
##     AGE GENDER   LOS  RACE TOTCHG APRDRG
##   <dbl>  <dbl> <dbl> <dbl>  <dbl>  <dbl>
## 1     0      1     2    NA   1156    640
# You can also use tidyr::drop_na() if you prefer the tidyverse
# library(tidyr)
df <- df %>% drop_na()



#### Distribution Check - All features Summary

summary(df) # Statistical Summary of the Data
##       AGE             GENDER            LOS              RACE      
##  Min.   : 0.000   Min.   :0.0000   Min.   : 0.000   Min.   :1.000  
##  1st Qu.: 0.000   1st Qu.:0.0000   1st Qu.: 2.000   1st Qu.:1.000  
##  Median : 0.000   Median :1.0000   Median : 2.000   Median :1.000  
##  Mean   : 5.376   Mean   :0.5053   Mean   : 2.871   Mean   :1.082  
##  3rd Qu.:14.000   3rd Qu.:1.0000   3rd Qu.: 3.000   3rd Qu.:1.000  
##  Max.   :17.000   Max.   :1.0000   Max.   :41.000   Max.   :6.000  
##      TOTCHG          APRDRG   
##  Min.   :  532   Min.   : 21  
##  1st Qu.: 1245   1st Qu.:640  
##  Median : 1620   Median :640  
##  Mean   : 2863   Mean   :615  
##  3rd Qu.: 2582   3rd Qu.:753  
##  Max.   :48388   Max.   :952
# Get a vector of all column names
col_names_vec <- colnames(df)

# Loop through each column to create distribution plots
library(ggplot2)
#library(dplyr)

# Define the output directory where plots will be saved
output_path <- "C:/Users/pprad/PradM_WD/RStudioWD/My_Projects/P7_USHealthcare-Agency/outputs/figures"

# Check if the directory exists, if not, create it
if (!dir.exists(output_path)) {
  dir.create(output_path, recursive = TRUE)
}

# Get a vector of all column names
col_names_vec <- names(df)

# Loop through each column to create distribution plots
for (col in col_names_vec) {
  # Check if the column is numeric
  if (is.numeric(df[[col]])) {
    
    # Create the histogram plot object
    hist_plot <- ggplot(df, aes(x = .data[[col]])) +
      geom_histogram(fill = 'skyblue', color = 'black') +
      labs(title = paste("Histogram of", col), x = col, y = "Count")
    
    # Print the plot to the RStudio Plots panel
    print(hist_plot)
    
    # Save the plot to the specified directory
    ggsave(filename = file.path(output_path, paste0(col, '_histogram.png')), plot = hist_plot)
    
    # Create the density plot object
    density_plot <- ggplot(df, aes(x = .data[[col]])) +
      geom_density(fill = 'lightgreen', alpha = 0.5) +
      labs(title = paste("Density Plot of", col), x = col)
    
    # Print the plot to the RStudio Plots panel
    print(density_plot)
    
    # Save the plot to the specified directory
    ggsave(filename = file.path(output_path, paste0(col, '_density.png')), plot = density_plot)
    
    cat(paste("Saved histogram and density plot for", col, "\n"))
  } else {
    cat(paste(col, "is not a numeric variable. Skipping plots.\n"))
  }
}
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Saving 7 x 5 in image
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Saving 7 x 5 in image
## Saved histogram and density plot for AGE
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Saving 7 x 5 in image
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Saving 7 x 5 in image
## Saved histogram and density plot for GENDER
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Saving 7 x 5 in image
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Saving 7 x 5 in image
## Saved histogram and density plot for LOS
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Saving 7 x 5 in image
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Saving 7 x 5 in image
## Saved histogram and density plot for RACE
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Saving 7 x 5 in image
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Saving 7 x 5 in image
## Saved histogram and density plot for TOTCHG
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Saving 7 x 5 in image
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Saving 7 x 5 in image
## Saved histogram and density plot for APRDRG

03) EXPLORATORY DATA ANALYSIS

01) 1. Age and Expenditure Analysis

Objective: Find the age category with the most hospital visits and maximum expenditure.

# Find the age with the highest number of hospital visits
age_visits <- df %>%
  count(AGE) %>%
  arrange(desc(n))

print("Top 5 ages by hospital visits:")
## [1] "Top 5 ages by hospital visits:"
print(head(age_visits, 5))
## # A tibble: 5 × 2
##     AGE     n
##   <dbl> <int>
## 1     0   280
## 2    17    38
## 3    15    29
## 4    16    29
## 5    14    25
# Find the age with the highest total expenditure
age_expenditure <- df %>%
  group_by(AGE) %>%
  summarize(Total_Expenditure = sum(TOTCHG, na.rm = TRUE)) %>%
  arrange(desc(Total_Expenditure))

print("Top 5 ages by total expenditure:")
## [1] "Top 5 ages by total expenditure:"
print(head(age_expenditure, 5))
## # A tibble: 5 × 2
##     AGE Total_Expenditure
##   <dbl>             <dbl>
## 1     0            644905
## 2    17            174777
## 3    15            111747
## 4    16             69149
## 5    14             64643
print(df[df$AGE == 0, ], n=300)
## # A tibble: 280 × 6
##       AGE GENDER   LOS  RACE TOTCHG APRDRG
##     <dbl>  <dbl> <dbl> <dbl>  <dbl>  <dbl>
##   1     0      1    41     1  29188    602
##   2     0      0     2     1   4717    138
##   3     0      0    12     1  15129    137
##   4     0      1     2     1   1085    640
##   5     0      0     3     1   1607    640
##   6     0      1     3     1   1499    640
##   7     0      1     3     1   7648     53
##   8     0      1     2     1   1527    640
##   9     0      0     2     1   1483    640
##  10     0      1     4     1   2844    640
##  11     0      1     3     1   3124    640
##  12     0      0     3     1   1760    640
##  13     0      1     2     1   1278    640
##  14     0      1     2     1   1620    640
##  15     0      1     2     1   1220    640
##  16     0      1     2     1   1134    640
##  17     0      0     3     1   1656    640
##  18     0      0     4     5   4072    639
##  19     0      0     2     5   1393    143
##  20     0      0     0     5    615    254
##  21     0      0     2     1   1385    640
##  22     0      0     2     1   1224    640
##  23     0      1     3     1   1779    640
##  24     0      0     2     1   1526    640
##  25     0      0     1     1   2075    581
##  26     0      0    17     1  12042    633
##  27     0      0     2     1   1309    640
##  28     0      0     2     1   1290    640
##  29     0      0     2     1   1280    640
##  30     0      0     3     1   1719    640
##  31     0      1     2     1   1102    640
##  32     0      1     3     1   1543    640
##  33     0      1     2     1   1174    640
##  34     0      1     2     1   1105    640
##  35     0      0     2     1   1335    640
##  36     0      0     2     1   1550    640
##  37     0      0     4     1   2473    640
##  38     0      0     2     1   1322    640
##  39     0      0     4     1   2553    640
##  40     0      1     2     1   1191    640
##  41     0      0     2     1   1439    640
##  42     0      1     2     1   1237    640
##  43     0      0     2     1   1265    640
##  44     0      1     4     1   2280    640
##  45     0      0     2     1   1096    640
##  46     0      1     2     1   1156    640
##  47     0      0     2     1   1199    640
##  48     0      1     4     1   2518    640
##  49     0      1     2     1   1246    640
##  50     0      1     3     1   1821    640
##  51     0      0     5     1   3101    626
##  52     0      1     2     1   1176    640
##  53     0      0     3     1   1891    640
##  54     0      0     1     1    935    640
##  55     0      0     2     1   1395    640
##  56     0      0     2     1   1561    640
##  57     0      1     7     1   6912    636
##  58     0      0     3     1   2197    640
##  59     0      0     4     1   2288    640
##  60     0      0     2     1   1320    640
##  61     0      1     2     1   1139    640
##  62     0      1     4     1   2134    639
##  63     0      0     2     1   1407    640
##  64     0      0     2     1   1982    640
##  65     0      0     4     1   2539    640
##  66     0      0     2     1   1528    640
##  67     0      1     2     1   1513    640
##  68     0      0     2     1   3977    139
##  69     0      1     2     1   1269    640
##  70     0      0     2     1   1501    640
##  71     0      1     2     1   1396    640
##  72     0      0     3     1   1777    640
##  73     0      1     1     1    833    640
##  74     0      1     1     1    715    640
##  75     0      0     2     1   1375    640
##  76     0      0     2     1   1330    640
##  77     0      0     2     1   1628    640
##  78     0      0     2     1   1368    640
##  79     0      0    39     1  26356    421
##  80     0      0     2     1   1305    640
##  81     0      0     2     1   1416    640
##  82     0      0     2     1    877    640
##  83     0      1     4     1   2082    640
##  84     0      1     2     1   1096    640
##  85     0      0     3     1   2028    640
##  86     0      0     2     1   1104    640
##  87     0      1     2     1   1093    640
##  88     0      1     4     1   2592    640
##  89     0      0     4     1   3609    640
##  90     0      0     3     1   2118    639
##  91     0      0     2     1   1805    640
##  92     0      0     1     1   2825    580
##  93     0      1     2     1   1299    640
##  94     0      0     2     1   1413    640
##  95     0      1     3     1   2354    640
##  96     0      1     2     1   1214    640
##  97     0      1     3     1   1843    640
##  98     0      1     4     1   2090    640
##  99     0      1     2     1   1111    640
## 100     0      1     3     1   1505    640
## 101     0      0     2     1   1277    640
## 102     0      0     3     1   1828    640
## 103     0      0     2     1   1581    640
## 104     0      1     2     1   1128    640
## 105     0      0     2     1   1976    640
## 106     0      0     3     1   2144    640
## 107     0      0     2     1   1283    640
## 108     0      1     3     1   1725    640
## 109     0      1     0     1   2847    581
## 110     0      1     3     1   1947    640
## 111     0      0     3     1   1669    640
## 112     0      1     2     1   1404    640
## 113     0      0     2     1   1272    640
## 114     0      1     2     1   1161    640
## 115     0      0     2     1   1286    640
## 116     0      0     2     1   1454    640
## 117     0      0     1     1    874    640
## 118     0      1     3     1   2129    626
## 119     0      1     3     1   2149    626
## 120     0      1     2     1   1317    640
## 121     0      1     2     1   1281    640
## 122     0      0     3     1   2330    640
## 123     0      0     8     1   5014    640
## 124     0      1     3     1   2029    640
## 125     0      1     3     1   1530    640
## 126     0      0     3     1   2220    626
## 127     0      0     2     1   1267    640
## 128     0      0     3     1   2057    640
## 129     0      0     2     1   1351    640
## 130     0      1    18     1  10649    626
## 131     0      1     3     1   2777    640
## 132     0      1     3     1   1915    640
## 133     0      0     2     1   1420    640
## 134     0      0     2     1   1337    640
## 135     0      0     3     1   2187    640
## 136     0      1     3     1   1748    640
## 137     0      1     2     1   1141    640
## 138     0      0     3     1   1795    640
## 139     0      1     2     1   1436    640
## 140     0      0     2     1   1156    640
## 141     0      0     2     1   1263    640
## 142     0      1     2     1   1324    640
## 143     0      0     3     1   1874    640
## 144     0      0     2     1   1538    640
## 145     0      0     4     1   3257    640
## 146     0      1     2     1   1576    633
## 147     0      0    15     1   8631    614
## 148     0      1     2     1   1173    640
## 149     0      0     3     1   2516    640
## 150     0      0     2     1   1446    640
## 151     0      0     3     1   1807    640
## 152     0      0     2     1   1307    640
## 153     0      0     3     1   1739    640
## 154     0      1     2     1   1298    640
## 155     0      0     1     1    921    640
## 156     0      0     7     1   8184    634
## 157     0      1     3     1   2051    640
## 158     0      1     3     1   1743    640
## 159     0      0     2     1   1491    640
## 160     0      1     3     1   1772    640
## 161     0      0     2     1   1437    640
## 162     0      1     4     1   2724    640
## 163     0      0     2     2   1291    640
## 164     0      1     2     1   1211    640
## 165     0      0     2     1   1383    640
## 166     0      1     3     1   1660    640
## 167     0      0     2     1   1458    640
## 168     0      0     2     1   1427    640
## 169     0      1     2     1   1217    640
## 170     0      1     2     1   1099    640
## 171     0      0     1     1    871    640
## 172     0      1     1     1   1688    138
## 173     0      1     3     1   2877    422
## 174     0      0     1     1   2805    138
## 175     0      0     2     1   4412    138
## 176     0      1     2     1   1818    249
## 177     0      0     5     1   4296    640
## 178     0      0     3     1   1887    640
## 179     0      1     2     1   1418    640
## 180     0      1     2     1   1136    640
## 181     0      1     2     1   1475    640
## 182     0      1     3     1   1538    640
## 183     0      0     1     1   1097    640
## 184     0      1     2     1   1362    640
## 185     0      0     3     1   3342    640
## 186     0      0     6     1   5568    640
## 187     0      1     7     1   4288    639
## 188     0      1     7     1   5788    614
## 189     0      1     0     1   2531    581
## 190     0      1    23     1  13112    614
## 191     0      0     3     1   3497    640
## 192     0      0     3     1   1832    640
## 193     0      1     3     1   1685    640
## 194     0      0     2     1   1135    640
## 195     0      1     2     1   1303    640
## 196     0      1     3     1   1890    640
## 197     0      0     3     1   1674    640
## 198     0      0     4     1   2595    640
## 199     0      0     3     1   1782    640
## 200     0      1     2     1   1251    640
## 201     0      0     3     1   1997    640
## 202     0      0     2     1   1758    640
## 203     0      0     2     1   1369    640
## 204     0      0     2     1   1477    640
## 205     0      0     2     1   1340    640
## 206     0      0     2     1   1236    640
## 207     0      0     5     1   4677    640
## 208     0      0     1     1   1051    640
## 209     0      1     4     3   3041    626
## 210     0      0     1     1    550    640
## 211     0      0     2     1   1039    640
## 212     0      1     3     1   1769    633
## 213     0      0     2     1   1794    640
## 214     0      0     2     1   1578    640
## 215     0      0     3     1   1883    640
## 216     0      0     2     6   1535    640
## 217     0      1     2     1   1200    640
## 218     0      1     3     1   1946    640
## 219     0      1     2     1   1875    640
## 220     0      1     3     1   1908    640
## 221     0      0     3     1   2080    640
## 222     0      0     2     1   1408    640
## 223     0      0     3     1   2008    640
## 224     0      0     2     1   1278    640
## 225     0      1     2     1   1083    640
## 226     0      0     3     1   1964    640
## 227     0      0     2     1   1231    640
## 228     0      0     3     1   1845    640
## 229     0      0     2     1   1428    640
## 230     0      1     7     1   5934    640
## 231     0      1     3     1   1888    640
## 232     0      1     3     1   1431    640
## 233     0      0     1     1    651    640
## 234     0      1     4     1   3969    640
## 235     0      0     2     1   1363    640
## 236     0      1     2     1   1118    640
## 237     0      1     2     1   1543    640
## 238     0      1     5     1   4304    640
## 239     0      0     3     1   1933    640
## 240     0      0     2     1   1120    640
## 241     0      0     2     1   1465    640
## 242     0      0     2     1   1768    634
## 243     0      0     2     1   1284    640
## 244     0      0     3     1   1952    640
## 245     0      0     4     1   2100    640
## 246     0      0     2     1   1281    640
## 247     0      1     2     1   1250    640
## 248     0      1     2     1   1178    640
## 249     0      1    24     1  13040    863
## 250     0      1     3     1   1517    640
## 251     0      0     3     4   3126    640
## 252     0      1     4     1   2049    640
## 253     0      0     2     1   1245    640
## 254     0      1     3     1   1603    640
## 255     0      1     3     1   2840    640
## 256     0      1     2     1   1411    640
## 257     0      1     2     1   1175    640
## 258     0      0     2     1   1689    640
## 259     0      1     3     1   2023    640
## 260     0      0     2     1   2540    640
## 261     0      0     7     1  10431    636
## 262     0      0     3     1   2204    633
## 263     0      0     3     1   2218    640
## 264     0      0     2     1   1382    640
## 265     0      1     2     1   1115    640
## 266     0      0     1     1    825    640
## 267     0      0     3     1   1701    640
## 268     0      0     2     1   1273    640
## 269     0      0     2     1   1270    640
## 270     0      1     2     1   1106    640
## 271     0      1     2     1   1065    640
## 272     0      0     2     1   1264    640
## 273     0      0     2     1   1282    640
## 274     0      0     2     1   1393    640
## 275     0      0     2     1   1266    640
## 276     0      1     3     1   1886    640
## 277     0      1     6     1   5881    636
## 278     0      1     2     1   1171    640
## 279     0      1     2     1   1086    640
## 280     0      0     4     1   4931    640
sum(df$TOTCHG[df$AGE == 0], na.rm = TRUE)
## [1] 644905