#Importing Data from 3 Different Sources ## FIRST WE ISNTALL PACKEGES install.packages(“DBI”) install.packages(“RSQLite”) install.packages(“haven”) install.packages(“dplyr”)

Loading all the libraries we need for different files

library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(haven)   # Needed for SPSS files
library(DBI)     # Needed for Database connections
library(RSQLite) # Needed to make a local SQL database

Source 1: Importing from my Downloads folder

medical_data <- read_csv("C:/Users/DELL/Downloads/medical_bills_ground_truth.csv",show_col_types = FALSE)
#### i can view the dataset by: 
View(medical_data)

Source 2: Importing an SPSS file (.sav)

## We use read_sav() from the haven package for SPSS dataset but for me 
## i dont have it SO IT WILL BRING ERROR 
# spss_data <- read_sav("C:/Users/DELL/Documents/patient_study.sav")

Source 3: Importing data from a SQL Database

First, we connect to the database (we will make a temporary one for this example)

con <- dbConnect(RSQLite::SQLite(), ":memory:")
# (Just putting some quick data into the database so we have something to read)
dbWriteTable(con, "hospital_records", data.frame(PatientID = 1:3, Status = c("Discharged", "Admitted", "Discharged")))

# Now, we write the actual code to IMPORT the table from the database into R
database_data <- dbReadTable(con, "hospital_records") 

#Using group_by() and the Pipe Operator (%>%)

The pipe operator %>% means “and then”. It takes the result of one line and passes it to the next line so we don’t have to keep saving temporary variables.

We use group_by() to group our data by a specific category, and then we use summarise() to calculate things like the total or the average for each group.

bills_summary <- medical_data %>% 
  group_by(document_type) %>% 
  summarise(
    Total_Count = n()
  )
print(bills_summary)
## # A tibble: 1 × 2
##   document_type Total_Count
##   <chr>               <int>
## 1 bill                  500

how to use trace and recover

# Using trace() allows us to "spy" on a function and run a message when it triggers, 
# without altering the actual function code.

# Creating a basic function to check document names
check_document <- function(doc_name) {
  print(paste("Checking file:", doc_name))
}

# Setting up a trace message for our function
trace(check_document, tracer = quote(print("--- TRACE ALERT: Function started ---")))
## [1] "check_document"
# Running the function to see the trace text in action
check_document("med_doc_bill_100001_noisy.jpg")
## Tracing check_document("med_doc_bill_100001_noisy.jpg") on entry 
## [1] "--- TRACE ALERT: Function started ---"
## [1] "Checking file: med_doc_bill_100001_noisy.jpg"
# Turning off the trace so it goes back to normal
untrace(check_document)
#2. Using recover() tells R to freeze at the exact millisecond of an error and open
# an interactive menu allowing us to inspect our variables.

# Turning on recover mode globally for errors
options(error = recover)

# Creating a simple function that will fail because we cannot add text to numbers
calculate_error_demo <- function(bill_number) {
  broken_sum <- bill_number + "dollars"
  return(broken_sum)
}

# recover browse menu in the console. Type '1' to look at variables, and type 'c'

# calculate_error_demo(500)

# Turning off recover mode to return R to normal error messages
options(error = NULL)

USING THE FILTER() FUNCTION

filter() keeps only the rows where our condition is TRUE.

Here, we filter our dataset to only show rows where document_type is exactly “bill”.

filtered_bills <- medical_data %>% 
  filter(document_type == "bill")

Looking at the first few rows of our filtered data

head(filtered_bills)
## # A tibble: 6 × 3
##   filename                      document_type json_data                         
##   <chr>                         <chr>         <chr>                             
## 1 med_doc_bill_100001_noisy.jpg bill          "{\n  \"hospital\": {\n    \"name…
## 2 med_doc_bill_100002_noisy.jpg bill          "{\n  \"hospital\": {\n    \"name…
## 3 med_doc_bill_100003_noisy.jpg bill          "{\n  \"hospital\": {\n    \"name…
## 4 med_doc_bill_100004_noisy.jpg bill          "{\n  \"hospital\": {\n    \"name…
## 5 med_doc_bill_100005_noisy.jpg bill          "{\n  \"hospital\": {\n    \"name…
## 6 med_doc_bill_100006_noisy.jpg bill          "{\n  \"hospital\": {\n    \"name…

VECTORIZED OPERATIONS EXAMPLE

# 1. Create a vector (list) of 4 patient medical bill balances
bill_balances <- c(100, 250, 400, 50)

# 2. Add a 10 dollar service fee to ALL bills at the exact same time
# R automatically applies '+ 10' to every single number inside the vector!
new_balances <- bill_balances + 10

# Printing the result to show the lecturer
print(new_balances)
## [1] 110 260 410  60