Problem Set 3
Load Diamonds data set Determine the characteristics of the data set and types of variables in it ??veiw #no help view(diamonds) #can't get view() to work and no documentation. levels(diamonds$color)
Descriptive statistics on price Create a histogram for the price of diamonds
hist(diamonds$price)
mean and median
summary(diamonds$price)
How many diamonds cost less than $500? 250? More than 15000?
nrow(subset(diamonds, price < 500))
nrow(subset(diamonds, price <= 250))
nrow(subset(diamonds, price >= 15000))
Now explore the histogram and attempt some transformations of the axis, bin widths and breaks
library(ggplot2)
qplot(price, data = diamonds, color = “red”, xlim == 15000) gets error: Error in xlim == 15000 : comparison (1) is possible only for atomic and list types Try different numbers:
qplot(price, data = diamonds, color = "red", xlim == 15001)
## Error: could not find function "qplot"
qplot(price, data = diamonds, color = "red", xlim == 16000)
## Error: could not find function "qplot"
qplot(price, data = diamonds, color = "red", xlim == 10000)
## Error: could not find function "qplot"
# Well, that was futile.
qplot(price, data = diamonds, color = "red", xlim = 15000)
## Error: could not find function "qplot"
# Single equal sign no help. Ahh, it needs two numbers!
qplot(price, data = diamonds, color = "green", xlim = c(0, 15000))
## Error: could not find function "qplot"
# weird, color doesn't change.
qplot(price, data = diamonds, facets = . ~ price, xlim = c(0, 15000))
## Error: could not find function "qplot"
Trying to get facets, seems like it is taking forever. Stopped after a few minutes of getting binwidth defaulted messages.
qplot(price, data = diamonds, binwidth = 10, facets = binwidth ~ ., xlim = c(0,
15000))
## Error: could not find function "qplot"
# 4 figures arranged in 2 rows and 2 columns
attach(diamonds)
## Error: object 'diamonds' not found
par(mfcol = c(1, 2))
hist(price, binwidth = 10, main = "price in 20 categories")
## Error: object 'price' not found
hist(price, binwidth = 100, main = "price in 50 categories")
## Error: object 'price' not found
Gave me the two graphs but didn't vary the binwidth as specified turns out that 20 is just the default in this case. It appears that binwidth is not a graphical parameter.
help(layout)
par() #get graphical parameters of the current device. Utterly unintelligible.
## $xlog
## [1] FALSE
##
## $ylog
## [1] FALSE
##
## $adj
## [1] 0.5
##
## $ann
## [1] TRUE
##
## $ask
## [1] FALSE
##
## $bg
## [1] "transparent"
##
## $bty
## [1] "o"
##
## $cex
## [1] 1
##
## $cex.axis
## [1] 1
##
## $cex.lab
## [1] 1
##
## $cex.main
## [1] 1.2
##
## $cex.sub
## [1] 1
##
## $cin
## [1] 0.15 0.20
##
## $col
## [1] "black"
##
## $col.axis
## [1] "black"
##
## $col.lab
## [1] "black"
##
## $col.main
## [1] "black"
##
## $col.sub
## [1] "black"
##
## $cra
## [1] 10.8 14.4
##
## $crt
## [1] 0
##
## $csi
## [1] 0.2
##
## $cxy
## [1] 0.02604 0.03876
##
## $din
## [1] 7 7
##
## $err
## [1] 0
##
## $family
## [1] ""
##
## $fg
## [1] "black"
##
## $fig
## [1] 0 1 0 1
##
## $fin
## [1] 7 7
##
## $font
## [1] 1
##
## $font.axis
## [1] 1
##
## $font.lab
## [1] 1
##
## $font.main
## [1] 2
##
## $font.sub
## [1] 1
##
## $lab
## [1] 5 5 7
##
## $las
## [1] 0
##
## $lend
## [1] "round"
##
## $lheight
## [1] 1
##
## $ljoin
## [1] "round"
##
## $lmitre
## [1] 10
##
## $lty
## [1] "solid"
##
## $lwd
## [1] 1
##
## $mai
## [1] 1.02 0.82 0.82 0.42
##
## $mar
## [1] 5.1 4.1 4.1 2.1
##
## $mex
## [1] 1
##
## $mfcol
## [1] 1 1
##
## $mfg
## [1] 1 1 1 1
##
## $mfrow
## [1] 1 1
##
## $mgp
## [1] 3 1 0
##
## $mkh
## [1] 0.001
##
## $new
## [1] FALSE
##
## $oma
## [1] 0 0 0 0
##
## $omd
## [1] 0 1 0 1
##
## $omi
## [1] 0 0 0 0
##
## $page
## [1] TRUE
##
## $pch
## [1] 1
##
## $pin
## [1] 5.76 5.16
##
## $plt
## [1] 0.1171 0.9400 0.1457 0.8829
##
## $ps
## [1] 12
##
## $pty
## [1] "m"
##
## $smo
## [1] 1
##
## $srt
## [1] 0
##
## $tck
## [1] NA
##
## $tcl
## [1] -0.5
##
## $usr
## [1] 0 1 0 1
##
## $xaxp
## [1] 0 1 5
##
## $xaxs
## [1] "r"
##
## $xaxt
## [1] "s"
##
## $xpd
## [1] FALSE
##
## $yaxp
## [1] 0 1 5
##
## $yaxs
## [1] "r"
##
## $yaxt
## [1] "s"
##
## $ylbias
## [1] 0.2
From cookbook for r: create the plots and store them.
library(ggplot2)
p100 <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(colour = "black",
binwidth = 100) + # facet_grid(Diet ~ .) +
ggtitle("histogram with binwidth 100")
# theme(legend.position='none') # No legend (redundant in this graph)
p1 #print the graph
## Error: object 'p1' not found
p50 <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(colour = "black",
binwidth = 50) + # facet_grid(Diet ~ .) +
ggtitle("histogram with binwidth 50")
# theme(legend.position='none') # No legend (redundant in this graph)
p20 <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(colour = "black",
binwidth = 20) + # facet_grid(Diet ~ .) +
ggtitle("histogram with binwidth 20")
# theme(legend.position='none') # No legend (redundant in this graph)
p20 #not what I expected. I was thinking that binwidth was the number of bins
p1000 <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(colour = "black",
binwidth = 1000) + # facet_grid(Diet ~ .) +
ggtitle("histogram with binwidth 1000") + theme(legend.position = "none") # No legend (redundant in this graph)
p1000
p1 <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(colour = "black",
binwidth = 1) + # facet_grid(Diet ~ .) +
ggtitle("histogram with binwidth 1")
# theme(legend.position='none') # No legend (redundant in this graph)
p1
multiplot(p1, p100, p1000, cols = 3)
## Error: could not find function "multiplot"
# couldn't find 'multiplot'
update.packages() #that was pointless
## Error: trying to use CRAN without setting a mirror
multiplot(p1, p100, cols = 2)
## Error: could not find function "multiplot"
Oh, turns out that multiplot is just created by source code. Have saved it as multiplot.R in the EDA folder
library(grid)
multiplot(p1, p100, p1000, cols = 3)
## Error: could not find function "multiplot"
multiplot(p1, p100, p1000, rows = 3)
## Error: could not find function "multiplot"
Now reproduce ramesh-37's plot
(pRamesh_1 <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(colour = "black",
binwidth = 1) + xlim) = c(325, 1450) + # facet_grid(Diet ~ .) +
ggtitle("histogram with binwidth 1")
## Error: non-numeric argument to binary operator
# theme(legend.position='none') didn't work.
Now I use qplot
pRamesh_1 <-
qplot(price, data = diamonds, binwidth = 5, xlim = c(326,1450))
pRamesh_1
qplot(price, data = diamonds)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
p1
pRamesh_5 <-
ggplot(data = diamonds, aes(x=price)) +
geom_histogram(colour="black", binwidth = 5) +
scale_x_continuous(limits = c(326,1450)) + #corrected syntax
ggtitle("histogram with binwidth = 5")
pRamesh_20 <-
ggplot(data = diamonds, aes(x=price)) +
geom_histogram(colour="black", binwidth = 20) +
scale_x_continuous(limits = c(326,1450)) + #corrected syntax
ggtitle("histogram with binwidth = 20")
pRamesh_50 <-
ggplot(data = diamonds, aes(x=price)) +
geom_histogram(colour="black", binwidth = 50) +
scale_x_continuous(limits = c(326,1450)) + #corrected syntax
ggtitle("histogram with binwidth = 50")
multiplot(pRamesh_5, pRamesh_20, pRamesh_50, cols = 1)
## Error: could not find function "multiplot"
Now we break out diamonds by cut
levels(cut) #thinks I am asking about the grass cutting data set
## NULL
names(diamonds)
## [1] "carat" "cut" "color" "clarity" "depth" "table" "price"
## [8] "x" "y" "z"
levels(diamonds$cut)
## [1] "Fair" "Good" "Very Good" "Premium" "Ideal"
diamondsPriceCutFair <-
ggplot(data = subset(diamonds, cut == "Fair"), aes(x=price)) +
geom_histogram(colour="black", binwidth = 20) +
scale_x_continuous(limits = c(326,1450)) + #corrected syntax
ggtitle("histogram cut Fair")
diamondsPriceCutGood <-
ggplot(data = subset(diamonds, cut == "Good"), aes(x=price)) +
geom_histogram(colour="black", binwidth = 20) +
scale_x_continuous(limits = c(326,1450)) + #corrected syntax
ggtitle("histogram cut Good")
diamondsPriceCutVery <-
ggplot(data = subset(diamonds, cut == "Very Good"), aes(x=price)) +
geom_histogram(colour="black", binwidth = 20) +
scale_x_continuous(limits = c(326,1450)) + #corrected syntax
ggtitle("histogram cut Very Good")
diamondsPriceCutPremium <-
ggplot(data = subset(diamonds, cut == "Premium"), aes(x=price)) +
geom_histogram(colour="black", binwidth = 20) +
scale_x_continuous(limits = c(326,1450)) + #corrected syntax
ggtitle("histogram cut Premium")
diamondsPriceCutIdeal <-
ggplot(data = subset(diamonds, cut == "Ideal"), aes(x=price)) +
geom_histogram(colour="black", binwidth = 20) +
scale_x_continuous(limits = c(326,1450)) + #corrected syntax
ggtitle("histogram cut Ideal")
multiplot(diamondsPriceCutFair,diamondsPriceCutGood,diamondsPriceCutVery,diamondsPriceCutPremium, diamondsPriceCutIdeal, cols = 1)
## Error: could not find function "multiplot"
summary(subset(diamonds$price, cut == "Fair")) #does not work
## Error: comparison (1) is possible only for atomic and list types
summary(price, data = subset(diamonds, cut == "Fair"))
## Error: object 'price' not found
summary(price, data = subset(diamonds, cut == "Good"))
## Error: object 'price' not found
summary(price, data = subset(diamonds, cut == "Very Good"))
## Error: object 'price' not found
summary(price, data = subset(diamonds, cut == "Premium"))
## Error: object 'price' not found
summary(price, data = subset(diamonds, cut == "Ideal"))
## Error: object 'price' not found
# gives same answer everytime.
max(price, data = subset(diamonds, cut == "Fair"))
## Error: object 'price' not found
diamondsFair <- subset(diamonds, cut == "Fair")
summary(diamondsFair$price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 337 2050 3280 4360 5210 18600
diamondsFair <- subset(diamonds, cut == "Fair")
summary(diamondsFair$price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 337 2050 3280 4360 5210 18600
max(diamondsFair$price)
## [1] 18574
min(diamondsFair$price)
## [1] 337
diamondsGood <- subset(diamonds, cut == "Good")
summary(diamondsGood$price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 327 1140 3050 3930 5030 18800
max(diamondsGood$price)
## [1] 18788
min(diamondsGood$price)
## [1] 327
diamondsVGood <- subset(diamonds, cut == "Very Good")
summary(diamondsVGood$price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 336 912 2650 3980 5370 18800
max(diamondsVGood$price)
## [1] 18818
min(diamondsVGood$price)
## [1] 336
diamondsPremium <- subset(diamonds, cut == "Premium")
summary(diamondsPremium$price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 1050 3180 4580 6300 18800
max(diamondsPremium$price)
## [1] 18823
min(diamondsPremium$price)
## [1] 326
diamondsIdeal <- subset(diamonds, cut == "Ideal")
summary(diamondsIdeal$price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 878 1810 3460 4680 18800
max(diamondsIdeal$price)
## [1] 18806
min(diamondsIdeal$price)
## [1] 326
qplot(x = price, data = diamonds) + facet_wrap(~cut)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
Produce the array of graphs with the y axes adjusted to the each level of cut.
qplot(x = price, data = diamonds) + facet_wrap(~cut, scales = "free_y")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
logPlot <- ggplot(aes(scale_x_log10(x = price), diamonds)) + facet_wrap(~cut)
## Error: ggplot2 doesn't know how to deal with data of class uneval
qplot(x = log10(price), data = diamonds) + facet_wrap(~cut)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
logPlot <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(binwidth = 100) +
scale_x_log10()
logPlot
Try it without the log10 thing
logPlot <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(binwidth = 1) +
scale_x_log10()
logPlot
nonLogPlot_bin1 <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(binwidth = 1) +
ggtitle("non-Log Plot, bin 1")
nonLogPlot_bin1
nonLogPlot_bin10 <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(binwidth = 10) +
ggtitle("nonLogPlot")
nonLogPlot_bin10
nonLogPlot_bin100 <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(binwidth = 100) +
ggtitle("nonLogPlot")
nonLogPlot_bin100
nonLogPlot_bin1000 <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(binwidth = 1000) +
ggtitle("nonLogPlot")
nonLogPlot_bin1000
logPlot_bin0.01 <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(binwidth = 0.01) +
scale_x_log10()
# xlim(0,18000)
logPlot_bin0.01
## Warning: position_stack requires constant width: output may be incorrect
logPlot_bin0.1 <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(binwidth = 0.1) +
scale_x_log10()
logPlot_bin0.1
logPlot_bin1 <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(binwidth = 1) +
scale_x_log10() + ggtitle("Log Plot, bin = 1")
logPlot_bin1
logPlot_bin10 <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(binwidth = 10) +
scale_x_log10() + ggtitle("Log Plot, bin 10")
logPlot_bin10
logPlot_bin100 <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(binwidth = 100) +
scale_x_log10() + ggtitle("Log Plot, bin 100")
logPlot_bin100
logPlot_bin1000 <- ggplot(data = diamonds, aes(x = price)) + geom_histogram(binwidth = 1000) +
scale_x_log10() + ggtitle("Log Plot, bin 1000")
logPlot_bin1000
## Error: 'from' cannot be NA, NaN or infinite
Moritz's code:
price_per_carat = diamonds$price/diamonds$carat
qplot(x = price_per_carat, data = diamonds, color = I("black"), fill = I("blue")) +
scale_x_log10() + facet_wrap(~cut, ncol = 1, scales = "free_y")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
Now change it to keep y scale constant
qplot(x = price_per_carat, data = diamonds) + geom_bar(fill = I("blue")) + scale_x_log10() +
facet_wrap(~cut, ncol = 1)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
Plotting two density curves over each other:
ggplot(diamonds, aes(price_per_carat, fill = cut)) + geom_bar(pos = "dodge") +
scale_x_log10()
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
ggplot(diamonds, aes(price_per_carat, fill = cut)) + geom_density(alpha = 0.2) +
scale_x_log10()
Or you can get two plots by using position = “identity”
ggplot(diamonds, aes(price, fill = cut)) + geom_histogram(alpha = 0.5, aes(y = ..density..),
position = "identity")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
Or use a density plot with alpha = 0.2
ggplot(diamonds, aes(price, fill = cut)) + geom_density(alpha = 0.2) + scale_x_log10()
ggplot(diamonds, aes(price_per_carat, fill = cut)) + geom_density(alpha = 0.2) +
scale_x_log10()
Now use boxplots
ggplot(diamonds, aes(y = price/carat, x = cut)) + geom_boxplot() + scale_y_log10()
summary(diamonds$price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 950 2400 3930 5320 18800
scale_colour_brewer()
## discrete_scale(aesthetics = "colour", scale_name = "brewer",
## palette = brewer_pal(type, palette))
summary(diamonds$price, data = subset(diamonds$cut["Fair"]))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 950 2400 3930 5320 18800
summary(diamonds$price, data = subset(diamonds$cut == "Good"))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 950 2400 3930 5320 18800
summary(diamonds$price, data = subset(diamonds$cut == "Very Good"))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 950 2400 3930 5320 18800
summary(diamonds$price, data = subset(diamonds$cut == "Premium"))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 950 2400 3930 5320 18800
summary(diamonds$price, data = subset(diamonds$cut == "Ideal"))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 950 2400 3930 5320 18800
summary(diamonds$price, data = subset(diamonds$color == "D"))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 950 2400 3930 5320 18800
summary(diamonds$price, data = subset(diamonds$color == "E"))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 950 2400 3930 5320 18800
summary(diamonds$price, data = subset(diamonds$color == "F"))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 950 2400 3930 5320 18800
summary(diamonds$price, data = subset(diamonds$color == "G"))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 950 2400 3930 5320 18800
summary(diamonds$price, data = subset(diamonds$color == "H"))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 950 2400 3930 5320 18800
summary(diamonds$price, data = subset(diamonds$color == "I"))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 950 2400 3930 5320 18800
summary(diamonds$price, data = subset(diamonds$color == "J"))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 950 2400 3930 5320 18800
IQR(subset(diamonds, color == "J")$price)
## [1] 5834
by(diamonds$price, diamonds$color, summary)
## diamonds$color: D
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 357 911 1840 3170 4210 18700
## --------------------------------------------------------
## diamonds$color: E
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 882 1740 3080 4000 18700
## --------------------------------------------------------
## diamonds$color: F
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 342 982 2340 3720 4870 18800
## --------------------------------------------------------
## diamonds$color: G
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 354 931 2240 4000 6050 18800
## --------------------------------------------------------
## diamonds$color: H
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 337 984 3460 4490 5980 18800
## --------------------------------------------------------
## diamonds$color: I
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 334 1120 3730 5090 7200 18800
## --------------------------------------------------------
## diamonds$color: J
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 335 1860 4230 5320 7700 18700
`?`(`?`(IQR()))
ggplot(diamonds, aes(y = price/carat, x = color)) + geom_boxplot()
ggplot(diamonds, aes(y = log10(price/carat), x = color)) + geom_boxplot()
ggplot(diamonds, aes(y = sqrt(price/carat), x = color)) + geom_boxplot() + scale_y_log10()
ggplot(diamonds, aes(y = price/carat, x = color)) + geom_boxplot()
4.25^10
## [1] 1922602
summary(diamonds$price/diamonds$carat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1050 2480 3500 4010 4950 17800
summary(log10(diamonds$price/diamonds$carat))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.02 3.39 3.54 3.55 3.69 4.25
summary((log10(diamonds$price/diamonds$carat))^10)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 63500 203000 312000 373000 474000 1930000
ggplot(data = diamonds, aes(x = color, y = price/carat)) + geom_boxplot() +
scale_y_log10()
Frequency Polygon
ggplot(diamonds, aes(x = carat)) + geom_freqpoly(binwidth = 0.1)
hist(diamonds$carat)
summary(diamonds$carat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.200 0.400 0.700 0.798 1.040 5.010
warnings()
## NULL
qplot(data = diamonds, x = carat, binwidth = 0.1, color = I("black"), fill = I("turquoise")) +
scale_x_continuous(limits = c(0, 6), breaks = seq(0, 6, 0.1)) + geom_abline(intercept = 2000,
slope = 0, color = "red", size = 1, lty = "dashed")