knitr::opts_chunk$set(echo = T,
                      warning = F, 
                      message = T,
                      fig.align="center")
setwd("C:/Users/Tania/Downloads")

library(readr)
data1 <- read_csv("HealthExpend.csv")
## Rows: 2000 Columns: 28
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): RACE, REGION, EDUC, MARISTAT, INCOME, PHSTAT, INDUSCLASS
## dbl (21): AGE, ANYLIMIT, COLLEGE, HIGHSCH, GENDER, MNHPOOR, insure, USC, UNE...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
attach(data1)

data.1 <- subset(data1,EXPENDIP>0)

attach(data.1)
## The following objects are masked from data1:
## 
##     AGE, ANYLIMIT, COLLEGE, COUNTIP, COUNTOP, EDUC, EDUC1, EXPENDIP,
##     EXPENDOP, famsize, GENDER, HIGHSCH, INCOME, INCOME1, INDUSCLASS,
##     insure, MANAGEDCARE, MARISTAT, MARISTAT1, MNHPOOR, PHSTAT, PHSTAT1,
##     RACE, RACE1, REGION, REGION1, UNEMPLOY, USC
names(data.1)
##  [1] "AGE"         "ANYLIMIT"    "COLLEGE"     "HIGHSCH"     "GENDER"     
##  [6] "MNHPOOR"     "insure"      "USC"         "UNEMPLOY"    "MANAGEDCARE"
## [11] "famsize"     "COUNTIP"     "EXPENDIP"    "COUNTOP"     "EXPENDOP"   
## [16] "RACE"        "RACE1"       "REGION"      "REGION1"     "EDUC"       
## [21] "EDUC1"       "MARISTAT"    "MARISTAT1"   "INCOME"      "INCOME1"    
## [26] "PHSTAT"      "PHSTAT1"     "INDUSCLASS"
dim(data.1)
## [1] 157  28
summary(EXPENDIP)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     26.3   2894.0   5694.7  12844.4  11062.3 607800.6
sd(EXPENDIP)
## [1] 48836.77
boxplot(EXPENDIP)

hist(EXPENDIP, nclass=100)

qqnorm(EXPENDIP)
qqline(EXPENDIP)

sqrtEXPENDIP <- sqrt(EXPENDIP)
hist(sqrtEXPENDIP,nclass=100)

qqnorm(sqrtEXPENDIP)
qqline(sqrtEXPENDIP)

lnEXPENDIP <- log(EXPENDIP)