HELPrct <- read.csv("https://raw.githubusercontent.com/murphystout/HELPrct/master/HELPrct.csv", stringsAsFactors = FALSE)
summary(HELPrct)
## X age anysubstatus anysub
## Min. : 1 Min. :19.00 Min. :0.0000 Length:453
## 1st Qu.:114 1st Qu.:30.00 1st Qu.:1.0000 Class :character
## Median :227 Median :35.00 Median :1.0000 Mode :character
## Mean :227 Mean :35.65 Mean :0.7724
## 3rd Qu.:340 3rd Qu.:40.00 3rd Qu.:1.0000
## Max. :453 Max. :60.00 Max. :1.0000
## NA's :207
## cesd d1 daysanysub dayslink
## Min. : 1.00 Min. : 0.00 Min. : 0.00 Min. : 2.0
## 1st Qu.:25.00 1st Qu.: 1.00 1st Qu.: 5.00 1st Qu.: 74.0
## Median :34.00 Median : 2.00 Median : 33.00 Median :361.0
## Mean :32.85 Mean : 3.06 Mean : 75.31 Mean :255.6
## 3rd Qu.:41.00 3rd Qu.: 3.00 3rd Qu.:164.25 3rd Qu.:365.0
## Max. :60.00 Max. :100.00 Max. :268.00 Max. :456.0
## NA's :209 NA's :22
## drugrisk e2b female sex
## Min. : 0.000 Min. : 1.000 Min. :0.0000 Length:453
## 1st Qu.: 0.000 1st Qu.: 1.000 1st Qu.:0.0000 Class :character
## Median : 0.000 Median : 2.000 Median :0.0000 Mode :character
## Mean : 1.887 Mean : 2.505 Mean :0.2362
## 3rd Qu.: 1.000 3rd Qu.: 3.000 3rd Qu.:0.0000
## Max. :21.000 Max. :21.000 Max. :1.0000
## NA's :1 NA's :239
## g1b homeless i1 i2
## Length:453 Length:453 Min. : 0.00 Min. : 0.00
## Class :character Class :character 1st Qu.: 3.00 1st Qu.: 4.00
## Mode :character Mode :character Median : 13.00 Median : 18.00
## Mean : 17.91 Mean : 24.55
## 3rd Qu.: 26.00 3rd Qu.: 33.00
## Max. :142.00 Max. :184.00
##
## id indtot linkstatus link
## Min. : 1.0 Min. : 4.00 Min. :0.0000 Length:453
## 1st Qu.:119.0 1st Qu.:32.00 1st Qu.:0.0000 Class :character
## Median :233.0 Median :38.00 Median :0.0000 Mode :character
## Mean :233.4 Mean :35.73 Mean :0.3782
## 3rd Qu.:348.0 3rd Qu.:41.00 3rd Qu.:1.0000
## Max. :470.0 Max. :45.00 Max. :1.0000
## NA's :22
## mcs pcs pss_fr racegrp
## Min. : 6.763 Min. :14.07 Min. : 0.000 Length:453
## 1st Qu.:21.676 1st Qu.:40.38 1st Qu.: 3.000 Class :character
## Median :28.602 Median :48.88 Median : 7.000 Mode :character
## Mean :31.677 Mean :48.05 Mean : 6.706
## 3rd Qu.:40.941 3rd Qu.:56.95 3rd Qu.:10.000
## Max. :62.175 Max. :74.81 Max. :14.000
##
## satreat sexrisk substance treat
## Length:453 Min. : 0.000 Length:453 Length:453
## Class :character 1st Qu.: 3.000 Class :character Class :character
## Mode :character Median : 4.000 Mode :character Mode :character
## Mean : 4.642
## 3rd Qu.: 6.000
## Max. :14.000
##
## avg_drinks max_drinks
## Min. : 0.00 Min. : 0.00
## 1st Qu.: 3.00 1st Qu.: 4.00
## Median : 13.00 Median : 18.00
## Mean : 17.91 Mean : 24.55
## 3rd Qu.: 26.00 3rd Qu.: 33.00
## Max. :142.00 Max. :184.00
##
mean(HELPrct$age)
## [1] 35.65342
median(HELPrct$age)
## [1] 35
mean(HELPrct$daysanysub, na.rm = TRUE)
## [1] 75.30738
median(HELPrct$daysanysub, na.rm = TRUE)
## [1] 33
males <- subset(HELPrct, sex == "male")
colnames(males) <- c("id_1", "age", "any_substance_status", "any_substance", "cesd", "d1", "days_using_any_substance", "dayslink", "drugrisk", "e2b", "female","sex","g1b", "homeless", "i1", "i2", "id_2", "indtot", "linkstatus","link","mcs","pcs","pss_fr","race_group","substance_abuse_treatment","sexrisk","substance","treatment","average_drinks","max_drinks")
summary(males)
## id_1 age any_substance_status any_substance
## Min. : 1.0 Min. :19.00 Min. :0.0000 Length:346
## 1st Qu.:108.8 1st Qu.:30.00 1st Qu.:1.0000 Class :character
## Median :236.5 Median :35.00 Median :1.0000 Mode :character
## Mean :230.1 Mean :35.47 Mean :0.7936
## 3rd Qu.:346.8 3rd Qu.:40.00 3rd Qu.:1.0000
## Max. :453.0 Max. :60.00 Max. :1.0000
## NA's :157
## cesd d1 days_using_any_substance dayslink
## Min. : 1.0 Min. : 0.000 Min. : 0.00 Min. : 2.0
## 1st Qu.:24.0 1st Qu.: 1.000 1st Qu.: 4.50 1st Qu.: 65.0
## Median :32.5 Median : 2.000 Median : 34.00 Median :358.0
## Mean :31.6 Mean : 2.928 Mean : 72.73 Mean :251.2
## 3rd Qu.:40.0 3rd Qu.: 3.000 3rd Qu.:154.00 3rd Qu.:365.0
## Max. :58.0 Max. :100.000 Max. :260.00 Max. :456.0
## NA's :159 NA's :16
## drugrisk e2b female sex
## Min. : 0.00 Min. : 1.000 Min. :0 Length:346
## 1st Qu.: 0.00 1st Qu.: 1.000 1st Qu.:0 Class :character
## Median : 0.00 Median : 2.000 Median :0 Mode :character
## Mean : 1.93 Mean : 2.614 Mean :0
## 3rd Qu.: 1.00 3rd Qu.: 3.000 3rd Qu.:0
## Max. :21.00 Max. :21.000 Max. :0
## NA's :1 NA's :175
## g1b homeless i1 i2
## Length:346 Length:346 Min. : 0.00 Min. : 0.00
## Class :character Class :character 1st Qu.: 4.00 1st Qu.: 6.00
## Mode :character Mode :character Median : 13.00 Median : 19.00
## Mean : 18.83 Mean : 25.95
## 3rd Qu.: 26.00 3rd Qu.: 36.00
## Max. :142.00 Max. :184.00
##
## id_2 indtot linkstatus link
## Min. : 1.0 Min. : 4.00 Min. :0.000 Length:346
## 1st Qu.:113.2 1st Qu.:34.00 1st Qu.:0.000 Class :character
## Median :238.5 Median :38.00 Median :0.000 Mode :character
## Mean :234.7 Mean :36.77 Mean :0.397
## 3rd Qu.:354.5 3rd Qu.:41.00 3rd Qu.:1.000
## Max. :469.0 Max. :45.00 Max. :1.000
## NA's :16
## mcs pcs pss_fr race_group
## Min. : 6.763 Min. :14.07 Min. : 0.000 Length:346
## 1st Qu.:22.687 1st Qu.:41.91 1st Qu.: 3.000 Class :character
## Median :29.683 Median :50.23 Median : 6.000 Mode :character
## Mean :32.523 Mean :48.99 Mean : 6.558
## 3rd Qu.:41.352 3rd Qu.:57.64 3rd Qu.:10.000
## Max. :62.175 Max. :74.81 Max. :14.000
##
## substance_abuse_treatment sexrisk substance
## Length:346 Min. : 0.0 Length:346
## Class :character 1st Qu.: 3.0 Class :character
## Mode :character Median : 4.0 Mode :character
## Mean : 4.5
## 3rd Qu.: 6.0
## Max. :13.0
##
## treatment average_drinks max_drinks
## Length:346 Min. : 0.00 Min. : 0.00
## Class :character 1st Qu.: 4.00 1st Qu.: 6.00
## Mode :character Median : 13.00 Median : 19.00
## Mean : 18.83 Mean : 25.95
## 3rd Qu.: 26.00 3rd Qu.: 36.00
## Max. :142.00 Max. :184.00
##
mean(males$age)
## [1] 35.46821
median(males$age)
## [1] 35
mean(males$days_using_any_substance, na.rm = TRUE)
## [1] 72.72727
median(males$days_using_any_substance, na.rm = TRUE)
## [1] 34
Note that the median age is the same, but the mean age of the males is slightly lower then the overall population.
Also males have slightly lower mean for “days_using_any_substance” than the general population.
males$homeless[males$homeless == "housed"] = "No"
males$homeless[males$homeless == "homeless"] = "Yes"
head(males)
## id_1 age any_substance_status any_substance cesd d1
## 1 1 37 1 yes 49 3
## 2 2 37 1 yes 30 22
## 3 3 26 1 yes 39 0
## 5 5 32 1 yes 39 12
## 8 8 28 1 yes 32 1
## 10 10 39 1 yes 46 4
## days_using_any_substance dayslink drugrisk e2b female sex g1b homeless
## 1 177 225 0 NA 0 male yes No
## 2 2 NA 0 NA 0 male yes Yes
## 3 3 365 20 NA 0 male no No
## 5 2 57 0 1 0 male no Yes
## 8 47 365 7 8 0 male yes Yes
## 10 115 382 20 3 0 male no Yes
## i1 i2 id_2 indtot linkstatus link mcs pcs pss_fr race_group
## 1 13 26 1 39 1 yes 25.111990 58.41369 0 black
## 2 56 62 2 43 NA <NA> 26.670307 36.03694 1 white
## 3 0 0 3 41 0 no 6.762923 74.80633 13 black
## 5 10 13 5 38 1 yes 21.675755 37.34558 10 black
## 8 12 24 8 44 0 no 9.160530 65.13801 4 white
## 10 20 27 10 44 0 no 36.143761 22.61060 0 white
## substance_abuse_treatment sexrisk substance treatment average_drinks
## 1 no 4 cocaine yes 13
## 2 no 7 alcohol yes 56
## 3 no 2 heroin no 0
## 5 no 6 cocaine no 10
## 8 yes 6 alcohol yes 12
## 10 yes 0 heroin yes 20
## max_drinks
## 1 26
## 2 62
## 3 0
## 5 13
## 8 24
## 10 27
Done is Question 1, i.e.: https://raw.githubusercontent.com/murphystout/HELPrct/master/HELPrct.csv