Exploratory Data Analysis in R. Choose an interesting dataset and use R graphics to describe the data. You may use base R graphics, or a graphics package of your choice. You should include at least one example of each of the following: . histogram . boxplot . scatterplot

library(RCurl)
## Loading required package: bitops
library(ggplot2)
library(plyr)
library(reshape2)
# install.packages("extrafont")
library(extrafont)
## Registering fonts with R

Read in .csv file and rename Data

weightLoss.data <- getURL("https://raw.githubusercontent.com/ann2014/CUNY/master/WeightLoss.csv")
weightLoss.data <- read.csv(text = weightLoss.data)
head(weightLoss.data)
##   X   group wl1 wl2 wl3 se1 se2 se3
## 1 1 Control   4   3   3  14  13  15
## 2 2 Control   4   4   3  13  14  17
## 3 3 Control   4   3   1  17  12  16
## 4 4 Control   3   2   1  11  11  12
## 5 5 Control   5   3   2  16  15  14
## 6 6 Control   6   5   4  17  18  18
names(weightLoss.data)[1] <- "id"
names(weightLoss.data)[3:5] <- c("WeightLoss_month1", "WeightLoss_month2", "WeightLoss_month3")
names(weightLoss.data)[6:8] <- c("SelfEsteem_month1", "SelfEsteem_month2", "SelfEsteem_month3")

wl.data <- melt(weightLoss.data[, 1:5], id.vars = c("id", "group"))
names(wl.data)[3:4] <- c("WeightLoss_Month", "WeightLoss")
we.data <- melt(weightLoss.data[, c(1,2,6,7,8)], id.vars = c("id", "group"))
names(we.data)[3:4] <- c("SelfEsteem_Month", "SelfEsteem_Score")
data.long <- cbind(wl.data, we.data)[, -5:-6]

str(weightLoss.data)
## 'data.frame':    34 obs. of  8 variables:
##  $ id               : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ group            : Factor w/ 3 levels "Control","Diet",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ WeightLoss_month1: int  4 4 4 3 5 6 6 5 5 3 ...
##  $ WeightLoss_month2: int  3 4 3 2 3 5 5 4 4 3 ...
##  $ WeightLoss_month3: int  3 3 1 1 2 4 4 1 1 2 ...
##  $ SelfEsteem_month1: int  14 13 17 11 16 17 17 13 14 14 ...
##  $ SelfEsteem_month2: int  13 14 12 11 15 18 16 15 14 15 ...
##  $ SelfEsteem_month3: int  15 17 16 12 14 18 19 15 15 13 ...
summary(weightLoss.data)
##        id            group    WeightLoss_month1 WeightLoss_month2
##  Min.   : 1.00   Control:12   Min.   :3.000     Min.   :2.000    
##  1st Qu.: 9.25   Diet   :12   1st Qu.:4.000     1st Qu.:3.000    
##  Median :17.50   DietEx :10   Median :5.000     Median :4.000    
##  Mean   :17.50                Mean   :5.294     Mean   :4.353    
##  3rd Qu.:25.75                3rd Qu.:6.000     3rd Qu.:5.000    
##  Max.   :34.00                Max.   :9.000     Max.   :9.000    
##  WeightLoss_month3 SelfEsteem_month1 SelfEsteem_month2 SelfEsteem_month3
##  Min.   :1.000     Min.   :11.00     Min.   :11.00     Min.   :11.00    
##  1st Qu.:1.000     1st Qu.:13.00     1st Qu.:12.00     1st Qu.:15.00    
##  Median :2.000     Median :15.00     Median :14.00     Median :17.00    
##  Mean   :2.176     Mean   :14.91     Mean   :13.82     Mean   :16.21    
##  3rd Qu.:3.000     3rd Qu.:16.00     3rd Qu.:15.00     3rd Qu.:18.00    
##  Max.   :4.000     Max.   :19.00     Max.   :19.00     Max.   :19.00
# Use Weight Loss (pounds) as categorical data, run weight loss frequence by group
table(weightLoss.data$group, weightLoss.data$WeightLoss_month1)
##          
##           3 4 5 6 7 8 9
##   Control 2 4 4 2 0 0 0
##   Diet    1 3 2 3 3 0 0
##   DietEx  2 1 0 2 2 1 2

Creating Histograms: use histograms to view the distribution of one-dimensional data

a <- ggplot(data.long, aes(x=as.factor(WeightLoss), fill=group))
a <- a + labs(x = "Weight in pounds", y = "Count", title = "Weight Loss by Group within 3 months")
a <- a + geom_bar() 
a <- a + facet_grid (WeightLoss_Month ~ group)
a <- a + geom_line(aes(y = SelfEsteem_Score, fill=group)) 
a <- a + geom_point(aes(y = SelfEsteem_Score, colour = "blue"))
a <- a + theme(legend.position='bottom', panel.grid.major.x = element_blank(), 
               panel.grid.minor.x = element_blank(),
               panel.grid.minor.y = element_blank(), 
               legend.key.size = unit(.5, "cm"), 
               axis.ticks.y =element_blank(), 
               plot.margin = unit( c(1,0,0,0) , units = "lines" ),
               plot.title = element_text(size = 30, lineheight = .8,
                                         vjust = 1, family = "Bauhaus 93"))
a <- a + scale_fill_discrete(guide_legend(title ="Group")) 
a

Create Scatter Plots

ggplot(weightLoss.data, aes(x= WeightLoss_month1, y = SelfEsteem_month1, color = group)) + 
  labs(x = "Weight Loss", y = "Self-Esteem Score", title = "Weight Loss vs. Self-Esteem - Month 1") +
  geom_point() + #geom_line() +
  facet_wrap(~ group)

ggplot(weightLoss.data, aes(x= WeightLoss_month2, y = SelfEsteem_month2, color = group)) + 
  labs(x = "Weight Loss", y = "Self-Esteem Score", title = "Weight Loss vs. Self-Esteem - Month 2") +
  geom_point() + #geom_line() +
  facet_wrap(~ group)

ggplot(weightLoss.data, aes(x= WeightLoss_month3, y = SelfEsteem_month3, color = group)) + 
  labs(x = "Weight Loss", y = "Self-Esteem Score", title = "Weight Loss vs. Self-Esteem - Month 3") +
  geom_point() + #geom_line() +
  facet_wrap(~ group)

Create Boxplots

ggplot(weightLoss.data) + 
  geom_boxplot(aes(x=group, y=WeightLoss_month1)) + coord_flip() +
  geom_boxplot(aes(x=group, y=SelfEsteem_month1), fill = "chartreuse4") +
  labs(x = "Group", y = "Weight Loss      Self-Esteem Score", title = "Weight Loss vs. Self-Esteem - Month 1") 

ggplot(weightLoss.data) + 
  geom_boxplot(aes(x=group, y=WeightLoss_month2)) + coord_flip() +
  geom_boxplot(aes(x=group, y=SelfEsteem_month2), fill = "chartreuse4") +
  labs(x = "Group", y = "Weight Loss      Self-Esteem Score", title = "Weight Loss vs. Self-Esteem - Month 2") 

ggplot(weightLoss.data) + 
  geom_boxplot(aes(x=group, y=WeightLoss_month3)) + coord_flip() +
  geom_boxplot(aes(x=group, y=SelfEsteem_month3), fill = "chartreuse4") +
  labs(x = "Group", y = "Weight Loss      Self-Esteem Score", title = "Weight Loss vs. Self-Esteem - Month 3") 

The graphic data visualization provides clear pictures of the relationships between weight loss and self-esteem during 3-month weight loss program. Three groups in the research also showed different pattern regarding self-esteem measurement: - The more weight loss associated with higher self-esteem. - The DietEx group lost more weight, and control group lost least weight in the first 2 months, the data are showing same pattern at month 3. - The longer into the program, the less the weight loss.