require(mosaicCore)
require(mosaicData)
require(ggplot2)

Improting one of the many mosaic data https://cran.r-project.org/web/packages/mosaicData/mosaicData.pdf

head(HELPrct)
##   age anysubstatus anysub cesd d1 daysanysub dayslink drugrisk e2b female
## 1  37            1    yes   49  3        177      225        0  NA      0
## 2  37            1    yes   30 22          2       NA        0  NA      0
## 3  26            1    yes   39  0          3      365       20  NA      0
## 4  39            1    yes   15  2        189      343        0   1      1
## 5  32            1    yes   39 12          2       57        0   1      0
## 6  47            1    yes    6  1         31      365        0  NA      1
##      sex g1b homeless i1 i2 id indtot linkstatus link       mcs      pcs
## 1   male yes   housed 13 26  1     39          1  yes 25.111990 58.41369
## 2   male yes homeless 56 62  2     43         NA <NA> 26.670307 36.03694
## 3   male  no   housed  0  0  3     41          0   no  6.762923 74.80633
## 4 female  no   housed  5  5  4     28          0   no 43.967880 61.93168
## 5   male  no homeless 10 13  5     38          1  yes 21.675755 37.34558
## 6 female  no   housed  4  4  6     29          0   no 55.508991 46.47521
##   pss_fr racegrp satreat sexrisk substance treat avg_drinks max_drinks
## 1      0   black      no       4   cocaine   yes         13         26
## 2      1   white      no       7   alcohol   yes         56         62
## 3     13   black      no       2    heroin    no          0          0
## 4     11   white     yes       4    heroin    no          5          5
## 5     10   black      no       6   cocaine    no         10         13
## 6      5   black      no       5   cocaine   yes          4          4
variable.names(HELPrct)
##  [1] "age"          "anysubstatus" "anysub"       "cesd"        
##  [5] "d1"           "daysanysub"   "dayslink"     "drugrisk"    
##  [9] "e2b"          "female"       "sex"          "g1b"         
## [13] "homeless"     "i1"           "i2"           "id"          
## [17] "indtot"       "linkstatus"   "link"         "mcs"         
## [21] "pcs"          "pss_fr"       "racegrp"      "satreat"     
## [25] "sexrisk"      "substance"    "treat"        "avg_drinks"  
## [29] "max_drinks"

Categorical plots in ggplot

Lets say, you want ot graphically explore if there is a relationship between type of susbtance abuse and race?

Interpretation: Are people less or more likely to be addicted to certain type of substance abuse depedning on their race!

library(ggplot2)
p <- ggplot(data = HELPrct, aes(x = racegrp, fill = substance))
p <- p + geom_bar(position = "fill")
p <- p + theme_bw()
p <- p + labs(x = "Race"
 , y = "Proportion"
 , title = "Fraction of subjects in the HELP RCT  with different race group by substance abuse status"
 )
# the legend title can be modified, if desired (try this line)
p <- p + scale_fill_discrete(name="Susbtance abuse type")
print(p)

### Exploring differences by gender

library(ggplot2)
p <- ggplot(data = HELPrct, aes(x = racegrp, fill = substance))
p <- p + geom_bar(position = "fill")
p <- p + theme_bw()
p <- p + labs(x = "Race"
 , y = "Proportion"
 , title = "Fraction of subjects in the HELP RCT  with different race group by substance abuse status"
 )
# the legend title can be modified, if desired (try this line)
p <- p + scale_fill_discrete(name="Susbtance Abuse Type")

# added a row facets by Sex
p <- p + facet_grid(sex ~ .)
# tilted the x-axis labels
p <- p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5))

print(p)