# Load in the datasetdata <-read.csv("C:/Users/colek/OneDrive/Desktop/DSC406_001/data/diabetic_data.csv")# Checking number of rows and columnsdim(data)
[1] 101766 50
head(data)
encounter_id patient_nbr race gender age weight
1 2278392 8222157 Caucasian Female [0-10) ?
2 149190 55629189 Caucasian Female [10-20) ?
3 64410 86047875 AfricanAmerican Female [20-30) ?
4 500364 82442376 Caucasian Male [30-40) ?
5 16680 42519267 Caucasian Male [40-50) ?
6 35754 82637451 Caucasian Male [50-60) ?
admission_type_id discharge_disposition_id admission_source_id
1 6 25 1
2 1 1 7
3 1 1 7
4 1 1 7
5 1 1 7
6 2 1 2
time_in_hospital payer_code medical_specialty num_lab_procedures
1 1 ? Pediatrics-Endocrinology 41
2 3 ? ? 59
3 2 ? ? 11
4 2 ? ? 44
5 1 ? ? 51
6 3 ? ? 31
num_procedures num_medications number_outpatient number_emergency
1 0 1 0 0
2 0 18 0 0
3 5 13 2 0
4 1 16 0 0
5 0 8 0 0
6 6 16 0 0
number_inpatient diag_1 diag_2 diag_3 number_diagnoses max_glu_serum
1 0 250.83 ? ? 1 None
2 0 276 250.01 255 9 None
3 1 648 250 V27 6 None
4 0 8 250.43 403 7 None
5 0 197 157 250 5 None
6 0 414 411 250 9 None
A1Cresult metformin repaglinide nateglinide chlorpropamide glimepiride
1 None No No No No No
2 None No No No No No
3 None No No No No No
4 None No No No No No
5 None No No No No No
6 None No No No No No
acetohexamide glipizide glyburide tolbutamide pioglitazone rosiglitazone
1 No No No No No No
2 No No No No No No
3 No Steady No No No No
4 No No No No No No
5 No Steady No No No No
6 No No No No No No
acarbose miglitol troglitazone tolazamide examide citoglipton insulin
1 No No No No No No No
2 No No No No No No Up
3 No No No No No No No
4 No No No No No No Up
5 No No No No No No Steady
6 No No No No No No Steady
glyburide.metformin glipizide.metformin glimepiride.pioglitazone
1 No No No
2 No No No
3 No No No
4 No No No
5 No No No
6 No No No
metformin.rosiglitazone metformin.pioglitazone change diabetesMed readmitted
1 No No No No NO
2 No No Ch Yes >30
3 No No No Yes NO
4 No No Ch Yes NO
5 No No Ch Yes NO
6 No No No Yes >30
This data set represents 10 years of clinical care at 130 U.S. hospitals and integrated delivery networks. Each row concerns hospital records of patients diagnosed with diabetes, who underwent laboratory tests, medications, and hospital stays of up to 14 days. The goal is to determine early readmission within 30 days of discharge. (Description adapted from data set source).
Why This Dataset?
This data set was chosen because of its large volume of data and the variety of features that can be explored. Key areas of interest include: - Factors influencing hospital readmission - Effectiveness of medications and procedures
Research Questions
What are the main factors that contribute to hospital readmission?
Which medications or procedures are most effective in reducing readmission rates?
Hypothesis
Patients who have been prescribed insulin have a lower likelihood of readmission compared to those not prescribed insulin.
Ethical Considerations
Patient privacy & data sensitivity (ensuring no personally identifiable information is misused)
Potential bias in medical treatment (differences in treatment effectiveness based on race, gender, or socioeconomic factors)
Data interpretation caution (avoiding false causation from correlations)
Bias Consideration
Since I do not have personal experience with diabetes or anyone close to me affected by it, I do not believe I am entering this analysis with any strong biases.
# Load necessary librarieslibrary(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(tibble)#Read the datasetdata <-read.csv("C:/Users/colek/OneDrive/Desktop/DSC406_001/data/diabetic_data.csv")# Create a Data Dictionary Tabledata_dictionary <-tibble(Variable_Name =colnames(data),Class =sapply(data, class), Continuity =sapply(data, function(x) {if (is.numeric(x) &&length(unique(x)) >20) "Continuous"else"Discrete" }),Description =c("Unique identifier for a hospital encounter","Unique identifier for a patient","Patient's race (e.g., Caucasian, African American, etc.)","Patient's gender (Male/Female)","Age group of the patient","Weight of the patient (often missing)","Type of hospital admission (e.g., Emergency, Urgent, Elective)","Indicates discharge status (e.g., Discharged, Expired, Transferred)","Source of admission (e.g., Physician referral, Emergency room)","Length of stay (days)","Insurance type (e.g., Medicare, Medicaid, Private)","Specialty of the admitting physician (e.g., Cardiology, Endocrinology)","Number of lab procedures performed during the encounter","Number of procedures (other than lab tests) performed","Number of distinct medications administered","Number of outpatient visits prior to admission","Number of emergency visits prior to admission","Number of inpatient visits prior to admission","Primary diagnosis (ICD-9 code)","Secondary diagnosis (ICD-9 code)","Tertiary diagnosis (ICD-9 code)","Total number of diagnoses for the patient","Max glucose serum test result (e.g., >200, >300)","HbA1c test result (e.g., >7, >8)","Whether Metformin was prescribed (No, Up, Down, Steady)","Whether Repaglinide was prescribed","Whether Nateglinide was prescribed","Whether Chlorpropamide was prescribed","Whether Glimepiride was prescribed","Whether Acetohexamide was prescribed","Whether Glipizide was prescribed","Whether Glyburide was prescribed","Whether Tolbutamide was prescribed","Whether Pioglitazone was prescribed","Whether Rosiglitazone was prescribed","Whether Acarbose was prescribed","Whether Miglitol was prescribed","Whether Troglitazone was prescribed","Whether Tolazamide was prescribed","Whether Examide was prescribed","Whether Citoglipton was prescribed","Whether insulin was prescribed","Whether Glyburide-Metformin was prescribed","Whether Glipizide-Metformin was prescribed","Whether Glimepiride-Pioglitazone was prescribed","Whether Metformin-Rosiglitazone was prescribed","Whether Metformin-Pioglitazone was prescribed","Indicates whether medications were changed (Yes/No)","Indicates whether the patient was on diabetes medication (Yes/No)","Indicates whether the patient was readmitted within 30 days, after 30 days, or not readmitted" ))# Print the Data Dictionary Tableprint(data_dictionary)
# A tibble: 50 × 4
Variable_Name Class Continuity Description
<chr> <chr> <chr> <chr>
1 encounter_id integer Continuous Unique identifier for a hospit…
2 patient_nbr integer Continuous Unique identifier for a patient
3 race character Discrete Patient's race (e.g., Caucasia…
4 gender character Discrete Patient's gender (Male/Female)
5 age character Discrete Age group of the patient
6 weight character Discrete Weight of the patient (often m…
7 admission_type_id integer Discrete Type of hospital admission (e.…
8 discharge_disposition_id integer Continuous Indicates discharge status (e.…
9 admission_source_id integer Discrete Source of admission (e.g., Phy…
10 time_in_hospital integer Discrete Length of stay (days)
# ℹ 40 more rows