One of the challenges in working with data is wrangling. In this
assignment we will use R to perform this task.
Here is a list of data sets: http://vincentarelbundock.github.io/Rdatasets/ (click on
the csv index for a list)
Please select one, download it and perform the following tasks:
I choose https://vincentarelbundock.github.io/Rdatasets/doc/AER/DoctorVisits.html for this week assignment.
Q7 bonus is asking to use link for data set, I will included with Q1
data_in_doctorvisits <- read.csv('https://vincentarelbundock.github.io/Rdatasets/csv/AER/DoctorVisits.csv',header=TRUE)
head(data_in_doctorvisits)
## X visits gender age income illness reduced health private freepoor freerepat
## 1 1 1 female 0.19 0.55 1 4 1 yes no no
## 2 2 1 female 0.19 0.45 1 2 1 yes no no
## 3 3 1 male 0.19 0.90 3 0 0 no no no
## 4 4 1 male 0.19 0.15 1 0 0 no no no
## 5 5 1 male 0.19 0.45 2 5 1 no no no
## 6 6 1 female 0.19 0.35 5 1 9 no no no
## nchronic lchronic
## 1 no no
## 2 no no
## 3 no no
## 4 no no
## 5 yes no
## 6 yes no
summary(data_in_doctorvisits)
## X visits gender age
## Min. : 1 Min. :0.0000 Length:5190 Min. :0.1900
## 1st Qu.:1298 1st Qu.:0.0000 Class :character 1st Qu.:0.2200
## Median :2596 Median :0.0000 Mode :character Median :0.3200
## Mean :2596 Mean :0.3017 Mean :0.4064
## 3rd Qu.:3893 3rd Qu.:0.0000 3rd Qu.:0.6200
## Max. :5190 Max. :9.0000 Max. :0.7200
## income illness reduced health
## Min. :0.0000 Min. :0.000 Min. : 0.0000 Min. : 0.000
## 1st Qu.:0.2500 1st Qu.:0.000 1st Qu.: 0.0000 1st Qu.: 0.000
## Median :0.5500 Median :1.000 Median : 0.0000 Median : 0.000
## Mean :0.5832 Mean :1.432 Mean : 0.8619 Mean : 1.218
## 3rd Qu.:0.9000 3rd Qu.:2.000 3rd Qu.: 0.0000 3rd Qu.: 2.000
## Max. :1.5000 Max. :5.000 Max. :14.0000 Max. :12.000
## private freepoor freerepat nchronic
## Length:5190 Length:5190 Length:5190 Length:5190
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lchronic
## Length:5190
## Class :character
## Mode :character
##
##
##
cat("the mean of income is :",mean(data_in_doctorvisits$income))
## the mean of income is : 0.5831599
cat("the mean of age is :",mean(data_in_doctorvisits$age))
## the mean of age is : 0.4063854
cat("the median of income is :",median(data_in_doctorvisits$income))
## the median of income is : 0.55
cat("the median of age is :",median(data_in_doctorvisits$age))
## the median of age is : 0.32
sub_data_in_dv <- subset(data_in_doctorvisits,age>0.50&income<0.55,select=c("X","visits","gender","age","income","illness","private"))
head(sub_data_in_dv,10)
## X visits gender age income illness private
## 504 504 1 male 0.52 0.35 1 no
## 505 505 1 female 0.52 0.25 1 no
## 506 506 1 male 0.52 0.25 4 no
## 509 509 6 female 0.52 0.25 4 no
## 510 510 3 female 0.52 0.25 3 no
## 512 512 1 female 0.52 0.35 3 no
## 514 514 1 female 0.52 0.00 2 yes
## 515 515 1 female 0.52 0.25 2 no
## 518 518 2 female 0.52 0.25 1 no
## 519 519 1 female 0.52 0.25 3 no
d <- sub_data_in_dv
names(d)[names(d)=="illness"] <- "sick_frq"
names(d)[names(d)=="private"] <- "private_ins"
head(d,10)
## X visits gender age income sick_frq private_ins
## 504 504 1 male 0.52 0.35 1 no
## 505 505 1 female 0.52 0.25 1 no
## 506 506 1 male 0.52 0.25 4 no
## 509 509 6 female 0.52 0.25 4 no
## 510 510 3 female 0.52 0.25 3 no
## 512 512 1 female 0.52 0.35 3 no
## 514 514 1 female 0.52 0.00 2 yes
## 515 515 1 female 0.52 0.25 2 no
## 518 518 2 female 0.52 0.25 1 no
## 519 519 1 female 0.52 0.25 3 no
summary(d)
## X visits gender age
## Min. : 504.0 Min. :0.0000 Length:1456 Min. :0.5200
## 1st Qu.: 965.8 1st Qu.:0.0000 Class :character 1st Qu.:0.6200
## Median :2399.5 Median :0.0000 Mode :character Median :0.6700
## Mean :2448.2 Mean :0.4663 Mean :0.6664
## 3rd Qu.:3442.2 3rd Qu.:1.0000 3rd Qu.:0.7200
## Max. :5190.0 Max. :9.0000 Max. :0.7200
## income sick_frq private_ins
## Min. :0.0000 Min. :0.000 Length:1456
## 1st Qu.:0.2500 1st Qu.:1.000 Class :character
## Median :0.2500 Median :2.000 Mode :character
## Mean :0.2704 Mean :1.903
## 3rd Qu.:0.3500 3rd Qu.:3.000
## Max. :0.4500 Max. :5.000
cat("the change of the mean of income from :",mean(data_in_doctorvisits$income),"to",mean(d$income))
## the change of the mean of income from : 0.5831599 to 0.2704327
cat("the change of the mean of age from :",mean(data_in_doctorvisits$age),"to",mean(d$age))
## the change of the mean of age from : 0.4063854 to 0.6663942
cat("the change of the median of income from :",median(data_in_doctorvisits$income),"to",median(d$income))
## the change of the median of income from : 0.55 to 0.25
cat("the change of the median of age from :",median(data_in_doctorvisits$age),"to",median(d$age))
## the change of the median of age from : 0.32 to 0.67
d$gender<- with(d,replace(gender,gender=="male","M"))
d$gender<- with(d,replace(gender,gender=="female","F"))
head(d,10)
## X visits gender age income sick_frq private_ins
## 504 504 1 M 0.52 0.35 1 no
## 505 505 1 F 0.52 0.25 1 no
## 506 506 1 M 0.52 0.25 4 no
## 509 509 6 F 0.52 0.25 4 no
## 510 510 3 F 0.52 0.25 3 no
## 512 512 1 F 0.52 0.35 3 no
## 514 514 1 F 0.52 0.00 2 yes
## 515 515 1 F 0.52 0.25 2 no
## 518 518 2 F 0.52 0.25 1 no
## 519 519 1 F 0.52 0.25 3 no
head(d,15)
## X visits gender age income sick_frq private_ins
## 504 504 1 M 0.52 0.35 1 no
## 505 505 1 F 0.52 0.25 1 no
## 506 506 1 M 0.52 0.25 4 no
## 509 509 6 F 0.52 0.25 4 no
## 510 510 3 F 0.52 0.25 3 no
## 512 512 1 F 0.52 0.35 3 no
## 514 514 1 F 0.52 0.00 2 yes
## 515 515 1 F 0.52 0.25 2 no
## 518 518 2 F 0.52 0.25 1 no
## 519 519 1 F 0.52 0.25 3 no
## 520 520 1 F 0.52 0.25 2 no
## 521 521 1 F 0.52 0.25 5 no
## 523 523 1 M 0.52 0.25 3 no
## 524 524 2 F 0.52 0.35 1 no
## 525 525 8 M 0.52 0.25 5 no
tail(d,15)
## X visits gender age income sick_frq private_ins
## 5162 5162 0 F 0.72 0.25 0 yes
## 5165 5165 0 F 0.72 0.25 0 no
## 5166 5166 0 M 0.72 0.25 0 no
## 5167 5167 0 F 0.72 0.25 0 yes
## 5169 5169 0 F 0.72 0.25 0 yes
## 5170 5170 0 F 0.72 0.25 0 yes
## 5171 5171 0 F 0.72 0.25 0 yes
## 5172 5172 0 F 0.72 0.25 0 yes
## 5174 5174 0 F 0.72 0.35 0 yes
## 5175 5175 0 F 0.72 0.25 0 no
## 5176 5176 0 F 0.72 0.25 0 no
## 5177 5177 0 F 0.72 0.25 0 no
## 5179 5179 0 M 0.72 0.25 0 no
## 5180 5180 0 F 0.72 0.35 0 no
## 5190 5190 0 M 0.72 0.25 0 no