#Importing Data from 3 Different Sources ## FIRST WE ISNTALL PACKEGES install.packages(“DBI”) install.packages(“RSQLite”) install.packages(“haven”) install.packages(“dplyr”)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(haven) # Needed for SPSS files
library(DBI) # Needed for Database connections
library(RSQLite) # Needed to make a local SQL database
medical_data <- read_csv("C:/Users/DELL/Downloads/medical_bills_ground_truth.csv",show_col_types = FALSE)
#### i can view the dataset by:
View(medical_data)
## We use read_sav() from the haven package for SPSS dataset but for me
## i dont have it SO IT WILL BRING ERROR
# spss_data <- read_sav("C:/Users/DELL/Documents/patient_study.sav")
con <- dbConnect(RSQLite::SQLite(), ":memory:")
# (Just putting some quick data into the database so we have something to read)
dbWriteTable(con, "hospital_records", data.frame(PatientID = 1:3, Status = c("Discharged", "Admitted", "Discharged")))
# Now, we write the actual code to IMPORT the table from the database into R
database_data <- dbReadTable(con, "hospital_records")
#Using group_by() and the Pipe Operator (%>%)
bills_summary <- medical_data %>%
group_by(document_type) %>%
summarise(
Total_Count = n()
)
print(bills_summary)
## # A tibble: 1 × 2
## document_type Total_Count
## <chr> <int>
## 1 bill 500
# Using trace() allows us to "spy" on a function and run a message when it triggers,
# without altering the actual function code.
# Creating a basic function to check document names
check_document <- function(doc_name) {
print(paste("Checking file:", doc_name))
}
# Setting up a trace message for our function
trace(check_document, tracer = quote(print("--- TRACE ALERT: Function started ---")))
## [1] "check_document"
# Running the function to see the trace text in action
check_document("med_doc_bill_100001_noisy.jpg")
## Tracing check_document("med_doc_bill_100001_noisy.jpg") on entry
## [1] "--- TRACE ALERT: Function started ---"
## [1] "Checking file: med_doc_bill_100001_noisy.jpg"
# Turning off the trace so it goes back to normal
untrace(check_document)
#2. Using recover() tells R to freeze at the exact millisecond of an error and open
# an interactive menu allowing us to inspect our variables.
# Turning on recover mode globally for errors
options(error = recover)
# Creating a simple function that will fail because we cannot add text to numbers
calculate_error_demo <- function(bill_number) {
broken_sum <- bill_number + "dollars"
return(broken_sum)
}
# recover browse menu in the console. Type '1' to look at variables, and type 'c'
# calculate_error_demo(500)
# Turning off recover mode to return R to normal error messages
options(error = NULL)
filtered_bills <- medical_data %>%
filter(document_type == "bill")
head(filtered_bills)
## # A tibble: 6 × 3
## filename document_type json_data
## <chr> <chr> <chr>
## 1 med_doc_bill_100001_noisy.jpg bill "{\n \"hospital\": {\n \"name…
## 2 med_doc_bill_100002_noisy.jpg bill "{\n \"hospital\": {\n \"name…
## 3 med_doc_bill_100003_noisy.jpg bill "{\n \"hospital\": {\n \"name…
## 4 med_doc_bill_100004_noisy.jpg bill "{\n \"hospital\": {\n \"name…
## 5 med_doc_bill_100005_noisy.jpg bill "{\n \"hospital\": {\n \"name…
## 6 med_doc_bill_100006_noisy.jpg bill "{\n \"hospital\": {\n \"name…
# 1. Create a vector (list) of 4 patient medical bill balances
bill_balances <- c(100, 250, 400, 50)
# 2. Add a 10 dollar service fee to ALL bills at the exact same time
# R automatically applies '+ 10' to every single number inside the vector!
new_balances <- bill_balances + 10
# Printing the result to show the lecturer
print(new_balances)
## [1] 110 260 410 60