data_frame = read.table(file="https://vincentarelbundock.github.io/Rdatasets/csv/causaldata/nsw_mixtape.csv", header=TRUE,sep=",")
summary(data_frame)
## X data_id treat age
## Min. : 1 Length:445 Min. :0.0000 Min. :17.00
## 1st Qu.:112 Class :character 1st Qu.:0.0000 1st Qu.:20.00
## Median :223 Mode :character Median :0.0000 Median :24.00
## Mean :223 Mean :0.4157 Mean :25.37
## 3rd Qu.:334 3rd Qu.:1.0000 3rd Qu.:28.00
## Max. :445 Max. :1.0000 Max. :55.00
## educ black hisp marr
## Min. : 3.0 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.: 9.0 1st Qu.:1.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :10.0 Median :1.0000 Median :0.00000 Median :0.0000
## Mean :10.2 Mean :0.8337 Mean :0.08764 Mean :0.1685
## 3rd Qu.:11.0 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :16.0 Max. :1.0000 Max. :1.00000 Max. :1.0000
## nodegree re74 re75 re78
## Min. :0.000 Min. : 0.0 Min. : 0 Min. : 0
## 1st Qu.:1.000 1st Qu.: 0.0 1st Qu.: 0 1st Qu.: 0
## Median :1.000 Median : 0.0 Median : 0 Median : 3702
## Mean :0.782 Mean : 2102.3 Mean : 1377 Mean : 5301
## 3rd Qu.:1.000 3rd Qu.: 824.4 3rd Qu.: 1221 3rd Qu.: 8125
## Max. :1.000 Max. :39570.7 Max. :25142 Max. :60308
mean_data_frame <- sprintf("%3.0f",mean(data_frame$age))
cat("mean(data_frame$age) = ", mean_data_frame, "\n")
## mean(data_frame$age) = 25
median_data_frame <- median(data_frame$treat)
cat("median(data_frame$treat) = ", median_data_frame, "\n")
## median(data_frame$treat) = 0
subset_data_frame <- subset(data_frame, data_id =="AA" & treat >=1)
library(plyr)
subset_data_frame <- rename(subset_data_frame, c("X"="Participant", "data_id"="kind_Of_Sport", "treat"="MinResult", "age"="MaxResult"))
summary(subset_data_frame)
## Participant kind_Of_Sport MinResult MaxResult educ
## Min. : NA Length:0 Min. : NA Min. : NA Min. : NA
## 1st Qu.: NA Class :character 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA
## Median : NA Mode :character Median : NA Median : NA Median : NA
## Mean :NaN Mean :NaN Mean :NaN Mean :NaN
## 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA
## Max. : NA Max. : NA Max. : NA Max. : NA
## black hisp marr nodegree re74
## Min. : NA Min. : NA Min. : NA Min. : NA Min. : NA
## 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA
## Median : NA Median : NA Median : NA Median : NA Median : NA
## Mean :NaN Mean :NaN Mean :NaN Mean :NaN Mean :NaN
## 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA
## Max. : NA Max. : NA Max. : NA Max. : NA Max. : NA
## re75 re78
## Min. : NA Min. : NA
## 1st Qu.: NA 1st Qu.: NA
## Median : NA Median : NA
## Mean :NaN Mean :NaN
## 3rd Qu.: NA 3rd Qu.: NA
## Max. : NA Max. : NA
mean_subset_data_frame <- sprintf("%3.0f",mean(subset_data_frame$MaxResult))
cat("mean(subset_data_frame$MaxResult) = ", mean_subset_data_frame, "\n")
## mean(subset_data_frame$MaxResult) = NaN
if (mean_subset_data_frame <= mean_data_frame) {
print("The subset's mean is less than, equal to the original data_frame")
} else {
print("The subset's mean is high than the original data_frame")
}
## [1] "The subset's mean is high than the original data_frame"
median_subset_data_frame <- sprintf("%3.0f",mean(subset_data_frame$MinResult))
cat("median(subset_data_frame$MinResult) = ", median_subset_data_frame, "\n")
## median(subset_data_frame$MinResult) = NaN
if (median_subset_data_frame <= median_data_frame) {
print("The subset's median is less than, equal to the original data_frame")
} else {
print("The subset's median is high than the original data_frame")
}
## [1] "The subset's median is high than the original data_frame"
require(stringr)
## Loading required package: stringr
subset_data_frame[subset_data_frame == "Participant"] <- "Par"
subset_data_frame[subset_data_frame == "kind_Of_Sport"] <- "KofS"
subset_data_frame[subset_data_frame == "MinResult"] <- "MinR"
subset_data_frame[subset_data_frame == "MaxResult"] <- "MaxR"
print(subset_data_frame)
## [1] Participant kind_Of_Sport MinResult MaxResult educ
## [6] black hisp marr nodegree re74
## [11] re75 re78
## <0 rows> (or 0-length row.names)
subset_data_frame
## [1] Participant kind_Of_Sport MinResult MaxResult educ
## [6] black hisp marr nodegree re74
## [11] re75 re78
## <0 rows> (or 0-length row.names)