Libraries used:
df <- read.csv("hw2_DataSet.csv")
For each data set, calculate mean, median, and 1st and 3rd quartile values. I first printed a summary of the data so that I would have correct results to compare my code to.
summary(df)
## sample random_A random_B random_C
## Min. : 1.00 Min. :0.4419 Min. : 0.04704 Min. :1.197
## 1st Qu.: 25.75 1st Qu.:4.1571 1st Qu.: 0.36628 1st Qu.:2.545
## Median : 50.50 Median :5.1800 Median : 0.94448 Median :4.856
## Mean : 50.50 Mean :5.1633 Mean : 1.66238 Mean :4.930
## 3rd Qu.: 75.25 3rd Qu.:6.1091 3rd Qu.: 1.98084 3rd Qu.:7.315
## Max. :100.00 Max. :9.8024 Max. :14.32779 Max. :8.617
## random_D
## Min. :0.1738
## 1st Qu.:3.0699
## Median :5.0803
## Mean :5.0208
## 3rd Qu.:7.0839
## Max. :9.9003
mean_a <- mean(df$random_A)
med_a <- median(df$random_A)
q1_a <- quantile(df$random_A, 0.25)
q3_a <- quantile(df$random_A, 0.75)
mean_b <- mean(df$random_B)
med_b <- median(df$random_B)
q1_b <- quantile(df$random_B, 0.25)
q3_b <- quantile(df$random_B, 0.75)
mean_c <- mean(df$random_C)
med_c <- median(df$random_C)
q1_c <- quantile(df$random_C, 0.25)
q3_c <- quantile(df$random_C, 0.75)
mean_d <- mean(df$random_D)
med_d <- median(df$random_D)
q1_d <- quantile(df$random_D, 0.25)
q3_d <- quantile(df$random_D, 0.75)
dfA <- c(mean_a, med_a, q1_a, q3_a)
dfB <- c(mean_b, med_b, q1_b, q3_b)
dfC <- c(mean_c, med_c, q1_c, q3_c)
dfD <- c(mean_d, med_d, q1_d, q3_d)
summary_all <- data.frame(dfA, dfB, dfC, dfD)
colnames(summary_all) <- c("Data Set A", "Data Set B", "Data Set C", "Data Set D")
rownames(summary_all) <- c("Mean", "Median", "1st Quartile", "3rd Quartile")
Table using kableExtra
summary_all %>%
kbl() %>%
kable_styling()
| Data Set A | Data Set B | Data Set C | Data Set D | |
|---|---|---|---|---|
| Mean | 5.163338 | 1.6623782 | 4.930255 | 5.020810 |
| Median | 5.179985 | 0.9444801 | 4.856182 | 5.080256 |
| 1st Quartile | 4.157068 | 0.3662825 | 2.545191 | 3.069908 |
| 3rd Quartile | 6.109085 | 1.9808429 | 7.315131 | 7.083904 |
Q1P_a <- geom_vline(
aes(xintercept=quantile(random_A, 0.25)),
color="blue",
linetype="dashed")
Q3P_a <- geom_vline(
aes(xintercept=quantile(random_A, 0.75)),
color="blue",
linetype="dashed")
meanP_a <- geom_vline(
aes(xintercept=mean(random_A)))
medP_a <- geom_vline(
aes(xintercept=median(random_A)),
color="green",
linetype="dashed")
dfA_plot <- ggplot(df, aes(x=random_A))
dfA_point <- dfA_plot +
geom_point(
aes(y=0),
alpha=.5,
size=.5) +
labs(
title = "'A' Scatterplot",
x = NULL,
y = NULL) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ) +
Q1P_a +
Q3P_a +
meanP_a +
medP_a
dfA_hist <- dfA_plot +
geom_histogram(
bins=30,
color="black",
fill="white") +
labs(
title = "'A' Histogram",
x = NULL,
y = NULL) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ) +
Q1P_a +
Q3P_a +
meanP_a +
medP_a
dfA_box <- dfA_plot +
geom_boxplot() +
labs(
title = "'A' Boxplot",
x = NULL,
y = NULL) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ) +
Q1P_a +
Q3P_a +
meanP_a +
medP_a
Q1P_b <- geom_vline(
aes(xintercept=quantile(random_B, 0.25)),
color="blue",
linetype="dashed")
Q3P_b <- geom_vline(
aes(xintercept=quantile(random_B, 0.75)),
color="blue",
linetype="dashed")
meanP_b <- geom_vline(
aes(xintercept=mean(random_B)))
medP_b <- geom_vline(
aes(xintercept=median(random_B)),
color="green",
linetype="dashed")
dfB_plot <- ggplot(df, aes(x=random_B))
dfB_point <- dfB_plot +
geom_point(
aes(y=0),
alpha=.5,
size=.5) +
labs(
title = "'B' Scatterplot",
x = NULL,
y = NULL) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ) +
Q1P_b +
Q3P_b +
meanP_b +
medP_b
dfB_hist <- dfB_plot +
geom_histogram(
bins=30,
color="black",
fill="white") +
labs(
title = "'B' Histogram",
x = NULL,
y = NULL) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ) +
Q1P_b +
Q3P_b +
meanP_b +
medP_b
dfB_box <- dfB_plot +
geom_boxplot() +
labs(
title = "'B' Boxplot",
x = NULL,
y = NULL) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ) +
Q1P_b +
Q3P_b +
meanP_b +
medP_b
Q1P_c <- geom_vline(
aes(xintercept=quantile(random_C, 0.25)),
color="blue",
linetype="dashed")
Q3P_c <- geom_vline(
aes(xintercept=quantile(random_C, 0.75)),
color="blue",
linetype="dashed")
meanP_c <- geom_vline(
aes(xintercept=mean(random_C)))
medP_c <- geom_vline(
aes(xintercept=median(random_C)),
color="green",
linetype="dashed")
dfC_plot <- ggplot(df, aes(x=random_C))
dfC_point <- dfC_plot +
geom_point(
aes(y=0),
alpha=.5,
size=.5) +
labs(
title = "'C' Scatterplot",
x = NULL,
y = NULL) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ) +
Q1P_c +
Q3P_c +
meanP_c +
medP_c
dfC_hist <- dfC_plot +
geom_histogram(
bins=30,
color="black",
fill="white") +
labs(
title = "'C' Histogram",
x = NULL,
y = NULL) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ) +
Q1P_c +
Q3P_c +
meanP_c +
medP_c
dfC_box <- dfC_plot +
geom_boxplot() +
labs(
title = "'C' Boxplot",
x = NULL,
y = NULL) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ) +
Q1P_c +
Q3P_c +
meanP_c +
medP_c
Q1P_d <- geom_vline(
aes(xintercept=quantile(random_D, 0.25)),
color="blue",
linetype="dashed")
Q3P_d <- geom_vline(
aes(xintercept=quantile(random_D, 0.75)),
color="blue",
linetype="dashed")
meanP_d <- geom_vline(
aes(xintercept=mean(random_D)))
medP_d <- geom_vline(
aes(xintercept=median(random_D)),
color="green",
linetype="dashed")
dfD_plot <- ggplot(df, aes(x=random_D))
dfD_point <- dfD_plot +
geom_point(
aes(y=0),
alpha=.5,
size=.5) +
labs(
title = "'D' Scatterplot",
x = NULL,
y = NULL) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ) +
Q1P_d +
Q3P_d +
meanP_d +
medP_d
dfD_hist <- dfD_plot +
geom_histogram(
bins=30,
color="black",
fill="white") +
labs(
title = "'D' Histogram",
x = NULL,
y = NULL) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ) +
Q1P_d +
Q3P_d +
meanP_d +
medP_d
dfD_box <- dfD_plot +
geom_boxplot() +
labs(
title = "'D' Boxplot",
x = NULL,
y = NULL) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank() ) +
Q1P_d +
Q3P_d +
meanP_d +
medP_d
combined_plots <- plot_grid(
dfA_point,
dfB_point,
dfC_point,
dfD_point,
dfA_box,
dfB_box,
dfC_box,
dfD_box,
dfA_hist,
dfB_hist,
dfC_hist,
dfD_hist,
align = c("hv"),
nrow = 3,
ncol = 4
)
title <- ggdraw() +
draw_label(
"Combined Grids",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
plot.margin = margin(0,0,0,7)
)
final_grid <- plot_grid(
title, combined_plots,
ncol = 1,
rel_heights = c(0.1, 1)
)
final_grid