knitr::opts_chunk$set(echo = T,
warning = F,
message = T,
fig.align="center")
setwd("C:/Users/Tania/Downloads")
library(readr)
data1 <- read_csv("HealthExpend.csv")
## Rows: 2000 Columns: 28
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): RACE, REGION, EDUC, MARISTAT, INCOME, PHSTAT, INDUSCLASS
## dbl (21): AGE, ANYLIMIT, COLLEGE, HIGHSCH, GENDER, MNHPOOR, insure, USC, UNE...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
attach(data1)
data.1 <- subset(data1,EXPENDIP>0)
attach(data.1)
## The following objects are masked from data1:
##
## AGE, ANYLIMIT, COLLEGE, COUNTIP, COUNTOP, EDUC, EDUC1, EXPENDIP,
## EXPENDOP, famsize, GENDER, HIGHSCH, INCOME, INCOME1, INDUSCLASS,
## insure, MANAGEDCARE, MARISTAT, MARISTAT1, MNHPOOR, PHSTAT, PHSTAT1,
## RACE, RACE1, REGION, REGION1, UNEMPLOY, USC
names(data.1)
## [1] "AGE" "ANYLIMIT" "COLLEGE" "HIGHSCH" "GENDER"
## [6] "MNHPOOR" "insure" "USC" "UNEMPLOY" "MANAGEDCARE"
## [11] "famsize" "COUNTIP" "EXPENDIP" "COUNTOP" "EXPENDOP"
## [16] "RACE" "RACE1" "REGION" "REGION1" "EDUC"
## [21] "EDUC1" "MARISTAT" "MARISTAT1" "INCOME" "INCOME1"
## [26] "PHSTAT" "PHSTAT1" "INDUSCLASS"
dim(data.1)
## [1] 157 28
summary(EXPENDIP)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 26.3 2894.0 5694.7 12844.4 11062.3 607800.6
sd(EXPENDIP)
## [1] 48836.77
boxplot(EXPENDIP)

hist(EXPENDIP, nclass=100)

qqnorm(EXPENDIP)
qqline(EXPENDIP)

sqrtEXPENDIP <- sqrt(EXPENDIP)
hist(sqrtEXPENDIP,nclass=100)

qqnorm(sqrtEXPENDIP)
qqline(sqrtEXPENDIP)

lnEXPENDIP <- log(EXPENDIP)