Here is the data to be analyzed. it is actually four short data sets. Use ggplot2 with R. Draw conclusions from your exploratory data analysis. Deliver your code and conclusions in a single R Markdown file on rpubs.com or github
require(ggplot2)
## Loading required package: ggplot2
require(reshape)
## Loading required package: reshape
require(plyr)
## Loading required package: plyr
##
## Attaching package: 'plyr'
##
## The following objects are masked from 'package:reshape':
##
## rename, round_any
df <- data.frame(x1 = c(10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0),
y1 = c(8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68),
x2 = c(10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0),
y2 = c(9.14, 8.14, 8.74, 8.77, 9.26, 8.1, 6.13, 3.1, 9.13, 7.26, 4.74),
x3 = c(10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0),
y3 = c(7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73),
x4 = c(8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 19.0, 8.0, 8.0, 8.0),
y4 = c(6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.5, 5.56, 7.91, 6.89)
)
df
## x1 y1 x2 y2 x3 y3 x4 y4
## 1 10 8.04 10 9.14 10 7.46 8 6.58
## 2 8 6.95 8 8.14 8 6.77 8 5.76
## 3 13 7.58 13 8.74 13 12.74 8 7.71
## 4 9 8.81 9 8.77 9 7.11 8 8.84
## 5 11 8.33 11 9.26 11 7.81 8 8.47
## 6 14 9.96 14 8.10 14 8.84 8 7.04
## 7 6 7.24 6 6.13 6 6.08 8 5.25
## 8 4 4.26 4 3.10 4 5.39 19 12.50
## 9 12 10.84 12 9.13 12 8.15 8 5.56
## 10 7 4.82 7 7.26 7 6.42 8 7.91
## 11 5 5.68 5 4.74 5 5.73 8 6.89
summary(df)
## x1 y1 x2 y2
## Min. : 4.0 Min. : 4.260 Min. : 4.0 Min. :3.100
## 1st Qu.: 6.5 1st Qu.: 6.315 1st Qu.: 6.5 1st Qu.:6.695
## Median : 9.0 Median : 7.580 Median : 9.0 Median :8.140
## Mean : 9.0 Mean : 7.501 Mean : 9.0 Mean :7.501
## 3rd Qu.:11.5 3rd Qu.: 8.570 3rd Qu.:11.5 3rd Qu.:8.950
## Max. :14.0 Max. :10.840 Max. :14.0 Max. :9.260
## x3 y3 x4 y4
## Min. : 4.0 Min. : 5.39 Min. : 8 Min. : 5.250
## 1st Qu.: 6.5 1st Qu.: 6.25 1st Qu.: 8 1st Qu.: 6.170
## Median : 9.0 Median : 7.11 Median : 8 Median : 7.040
## Mean : 9.0 Mean : 7.50 Mean : 9 Mean : 7.501
## 3rd Qu.:11.5 3rd Qu.: 7.98 3rd Qu.: 8 3rd Qu.: 8.190
## Max. :14.0 Max. :12.74 Max. :19 Max. :12.500
attributes(df)
## $names
## [1] "x1" "y1" "x2" "y2" "x3" "y3" "x4" "y4"
##
## $row.names
## [1] 1 2 3 4 5 6 7 8 9 10 11
##
## $class
## [1] "data.frame"
plot(df$x1, df$y1, main = "Data Set I", xlab = "x", ylab = "y")
plot(df$x2, df$y2, main = "Data Set II", xlab = "x", ylab = "y")
plot(df$x3, df$y3, main = "Data Set III", xlab = "x", ylab = "y")
plot(df$x4, df$y4, main = "Data Set IV", xlab = "x", ylab = "y")
# 1. Set vectors P with x and y values
p1 <- data.frame(x= df$x1, y = df$y1)
p1
## x y
## 1 10 8.04
## 2 8 6.95
## 3 13 7.58
## 4 9 8.81
## 5 11 8.33
## 6 14 9.96
## 7 6 7.24
## 8 4 4.26
## 9 12 10.84
## 10 7 4.82
## 11 5 5.68
p2 <- data.frame(x= df$x2, y = df$y2)
p2
## x y
## 1 10 9.14
## 2 8 8.14
## 3 13 8.74
## 4 9 8.77
## 5 11 9.26
## 6 14 8.10
## 7 6 6.13
## 8 4 3.10
## 9 12 9.13
## 10 7 7.26
## 11 5 4.74
p3 <- data.frame(x= df$x3, y = df$y3)
p3
## x y
## 1 10 7.46
## 2 8 6.77
## 3 13 12.74
## 4 9 7.11
## 5 11 7.81
## 6 14 8.84
## 7 6 6.08
## 8 4 5.39
## 9 12 8.15
## 10 7 6.42
## 11 5 5.73
p4 <- data.frame(x= df$x4, y = df$y4)
p4
## x y
## 1 8 6.58
## 2 8 5.76
## 3 8 7.71
## 4 8 8.84
## 5 8 8.47
## 6 8 7.04
## 7 8 5.25
## 8 19 12.50
## 9 8 5.56
## 10 8 7.91
## 11 8 6.89
zz <- melt(list(dataset1=p1, dataset2=p2, dataset3=p3, dataset4=p4), id.vars="x")
#here is zz melted datapoints
zz
## x variable value L1
## 1 10 y 8.04 dataset1
## 2 8 y 6.95 dataset1
## 3 13 y 7.58 dataset1
## 4 9 y 8.81 dataset1
## 5 11 y 8.33 dataset1
## 6 14 y 9.96 dataset1
## 7 6 y 7.24 dataset1
## 8 4 y 4.26 dataset1
## 9 12 y 10.84 dataset1
## 10 7 y 4.82 dataset1
## 11 5 y 5.68 dataset1
## 12 10 y 9.14 dataset2
## 13 8 y 8.14 dataset2
## 14 13 y 8.74 dataset2
## 15 9 y 8.77 dataset2
## 16 11 y 9.26 dataset2
## 17 14 y 8.10 dataset2
## 18 6 y 6.13 dataset2
## 19 4 y 3.10 dataset2
## 20 12 y 9.13 dataset2
## 21 7 y 7.26 dataset2
## 22 5 y 4.74 dataset2
## 23 10 y 7.46 dataset3
## 24 8 y 6.77 dataset3
## 25 13 y 12.74 dataset3
## 26 9 y 7.11 dataset3
## 27 11 y 7.81 dataset3
## 28 14 y 8.84 dataset3
## 29 6 y 6.08 dataset3
## 30 4 y 5.39 dataset3
## 31 12 y 8.15 dataset3
## 32 7 y 6.42 dataset3
## 33 5 y 5.73 dataset3
## 34 8 y 6.58 dataset4
## 35 8 y 5.76 dataset4
## 36 8 y 7.71 dataset4
## 37 8 y 8.84 dataset4
## 38 8 y 8.47 dataset4
## 39 8 y 7.04 dataset4
## 40 8 y 5.25 dataset4
## 41 19 y 12.50 dataset4
## 42 8 y 5.56 dataset4
## 43 8 y 7.91 dataset4
## 44 8 y 6.89 dataset4
#cleaning up ZZ
zz$variable <- NULL
rename(zz, c("value" = "y", "L1" = "DataSet"))
## x y DataSet
## 1 10 8.04 dataset1
## 2 8 6.95 dataset1
## 3 13 7.58 dataset1
## 4 9 8.81 dataset1
## 5 11 8.33 dataset1
## 6 14 9.96 dataset1
## 7 6 7.24 dataset1
## 8 4 4.26 dataset1
## 9 12 10.84 dataset1
## 10 7 4.82 dataset1
## 11 5 5.68 dataset1
## 12 10 9.14 dataset2
## 13 8 8.14 dataset2
## 14 13 8.74 dataset2
## 15 9 8.77 dataset2
## 16 11 9.26 dataset2
## 17 14 8.10 dataset2
## 18 6 6.13 dataset2
## 19 4 3.10 dataset2
## 20 12 9.13 dataset2
## 21 7 7.26 dataset2
## 22 5 4.74 dataset2
## 23 10 7.46 dataset3
## 24 8 6.77 dataset3
## 25 13 12.74 dataset3
## 26 9 7.11 dataset3
## 27 11 7.81 dataset3
## 28 14 8.84 dataset3
## 29 6 6.08 dataset3
## 30 4 5.39 dataset3
## 31 12 8.15 dataset3
## 32 7 6.42 dataset3
## 33 5 5.73 dataset3
## 34 8 6.58 dataset4
## 35 8 5.76 dataset4
## 36 8 7.71 dataset4
## 37 8 8.84 dataset4
## 38 8 8.47 dataset4
## 39 8 7.04 dataset4
## 40 8 5.25 dataset4
## 41 19 12.50 dataset4
## 42 8 5.56 dataset4
## 43 8 7.91 dataset4
## 44 8 6.89 dataset4
xsum <- aggregate(zz$x, by=list(Category=zz$L1), FUN=sum)
xsum
## Category x
## 1 dataset1 99
## 2 dataset2 99
## 3 dataset3 99
## 4 dataset4 99
ysum <- aggregate(zz$value, by=list(Category=zz$L1), FUN=sum)
ysum
## Category x
## 1 dataset1 82.51
## 2 dataset2 82.51
## 3 dataset3 82.50
## 4 dataset4 82.51
ggplot(zz, aes(x, y=value, color=L1)) +
geom_point() +
geom_line() +
scale_color_manual("Dataset",
values = c("dataset1" = "darkgreen", "dataset2" = "blue", "dataset3" = "red", "dataset4" = "orange"))