library(ggplot2)
data("diamonds")
summary(diamonds)
## carat cut color clarity
## Min. :0.2000 Fair : 1610 D: 6775 SI1 :13065
## 1st Qu.:0.4000 Good : 4906 E: 9797 VS2 :12258
## Median :0.7000 Very Good:12082 F: 9542 SI2 : 9194
## Mean :0.7979 Premium :13791 G:11292 VS1 : 8171
## 3rd Qu.:1.0400 Ideal :21551 H: 8304 VVS2 : 5066
## Max. :5.0100 I: 5422 VVS1 : 3655
## J: 2808 (Other): 2531
## depth table price x
## Min. :43.00 Min. :43.00 Min. : 326 Min. : 0.000
## 1st Qu.:61.00 1st Qu.:56.00 1st Qu.: 950 1st Qu.: 4.710
## Median :61.80 Median :57.00 Median : 2401 Median : 5.700
## Mean :61.75 Mean :57.46 Mean : 3933 Mean : 5.731
## 3rd Qu.:62.50 3rd Qu.:59.00 3rd Qu.: 5324 3rd Qu.: 6.540
## Max. :79.00 Max. :95.00 Max. :18823 Max. :10.740
##
## y z
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 4.720 1st Qu.: 2.910
## Median : 5.710 Median : 3.530
## Mean : 5.735 Mean : 3.539
## 3rd Qu.: 6.540 3rd Qu.: 4.040
## Max. :58.900 Max. :31.800
##
qplot(data = diamonds,x = price,binwidth = 500,
color = I('black'),fill = I('#CCDDFF')) +
scale_x_continuous(breaks = seq(0,20000,5000))
#价格的描述性统计
summary(diamonds$price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 950 2401 3933 5324 18820
qplot(data = diamonds,x = price,binwidth = 500,
color = I('black'),fill = I('#CCDDFF')) +
scale_x_continuous(breaks = seq(0,20000,5000)) +
facet_wrap(~cut,ncol = 1)
##对价格从切割类型上分离的描述性统计(在求具体统计量时,最好不要使用summary函数)
table(diamonds$cut)
##
## Fair Good Very Good Premium Ideal
## 1610 4906 12082 13791 21551
by(diamonds$price,diamonds$cut,summary)
## diamonds$cut: Fair
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 337 2050 3282 4359 5206 18570
## --------------------------------------------------------
## diamonds$cut: Good
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 327 1145 3050 3929 5028 18790
## --------------------------------------------------------
## diamonds$cut: Very Good
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 336 912 2648 3982 5373 18820
## --------------------------------------------------------
## diamonds$cut: Premium
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 1046 3185 4584 6296 18820
## --------------------------------------------------------
## diamonds$cut: Ideal
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 878 1810 3458 4678 18810
qplot(x = price, data = diamonds) + facet_wrap(~cut,,scales = "free_y")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#每克拉价格的直方图
qplot(x = carat,data = diamonds,binwidth = 0.1,
color = I('black'),fill = I('#E38EFF')) +
scale_x_continuous(limits = c(0,3),breaks = seq(0,3,0.2)) +
facet_wrap(~cut)
## Warning: Removed 32 rows containing non-finite values (stat_bin).
#箱线图
qplot(data = diamonds,x = color,y =price,geom = 'boxplot') +
coord_cartesian(ylim = c(0,7700)) +
scale_y_continuous(breaks = seq(0,8000,500))
#单个因素的描述性统计量
summary(subset(diamonds,diamonds$color == 'J'))
## carat cut color clarity depth
## Min. :0.230 Fair :119 D: 0 SI1 :750 Min. :43.00
## 1st Qu.:0.710 Good :307 E: 0 VS2 :731 1st Qu.:61.20
## Median :1.110 Very Good:678 F: 0 VS1 :542 Median :62.00
## Mean :1.162 Premium :808 G: 0 SI2 :479 Mean :61.89
## 3rd Qu.:1.520 Ideal :896 H: 0 VVS2 :131 3rd Qu.:62.70
## Max. :5.010 I: 0 VVS1 : 74 Max. :73.60
## J:2808 (Other):101
## table price x y
## Min. :51.60 Min. : 335 Min. : 3.930 Min. : 3.900
## 1st Qu.:56.00 1st Qu.: 1860 1st Qu.: 5.700 1st Qu.: 5.718
## Median :58.00 Median : 4234 Median : 6.640 Median : 6.630
## Mean :57.81 Mean : 5324 Mean : 6.519 Mean : 6.518
## 3rd Qu.:59.00 3rd Qu.: 7695 3rd Qu.: 7.380 3rd Qu.: 7.380
## Max. :68.00 Max. :18710 Max. :10.740 Max. :10.540
##
## z
## Min. :2.460
## 1st Qu.:3.530
## Median :4.110
## Mean :4.033
## 3rd Qu.:4.580
## Max. :6.980
##
qplot(data = diamonds,x = carat,binwidth = 0.5,
geom = 'freqpoly',color = color) +
scale_x_continuous(limits = c(0,5),breaks = seq(0,5,0.5)) +
scale_y_continuous(breaks = seq(0,15000,1000))
## Warning: Removed 1 rows containing non-finite values (stat_bin).
## Warning: Removed 14 rows containing missing values (geom_path).