导入钻石数据

library(ggplot2)
data("diamonds")
summary(diamonds)
##      carat               cut        color        clarity     
##  Min.   :0.2000   Fair     : 1610   D: 6775   SI1    :13065  
##  1st Qu.:0.4000   Good     : 4906   E: 9797   VS2    :12258  
##  Median :0.7000   Very Good:12082   F: 9542   SI2    : 9194  
##  Mean   :0.7979   Premium  :13791   G:11292   VS1    : 8171  
##  3rd Qu.:1.0400   Ideal    :21551   H: 8304   VVS2   : 5066  
##  Max.   :5.0100                     I: 5422   VVS1   : 3655  
##                                     J: 2808   (Other): 2531  
##      depth           table           price             x         
##  Min.   :43.00   Min.   :43.00   Min.   :  326   Min.   : 0.000  
##  1st Qu.:61.00   1st Qu.:56.00   1st Qu.:  950   1st Qu.: 4.710  
##  Median :61.80   Median :57.00   Median : 2401   Median : 5.700  
##  Mean   :61.75   Mean   :57.46   Mean   : 3933   Mean   : 5.731  
##  3rd Qu.:62.50   3rd Qu.:59.00   3rd Qu.: 5324   3rd Qu.: 6.540  
##  Max.   :79.00   Max.   :95.00   Max.   :18823   Max.   :10.740  
##                                                                  
##        y                z         
##  Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 4.720   1st Qu.: 2.910  
##  Median : 5.710   Median : 3.530  
##  Mean   : 5.735   Mean   : 3.539  
##  3rd Qu.: 6.540   3rd Qu.: 4.040  
##  Max.   :58.900   Max.   :31.800  
## 

创建价格直方图

qplot(data = diamonds,x = price,binwidth = 500,
        color = I('black'),fill = I('#CCDDFF')) + 
scale_x_continuous(breaks = seq(0,20000,5000)) 

#价格的描述性统计

summary(diamonds$price)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     326     950    2401    3933    5324   18820

对价格从切割类型上分离

qplot(data = diamonds,x = price,binwidth = 500,
        color = I('black'),fill = I('#CCDDFF')) + 
scale_x_continuous(breaks = seq(0,20000,5000)) +
facet_wrap(~cut,ncol = 1)

##对价格从切割类型上分离的描述性统计(在求具体统计量时,最好不要使用summary函数)

table(diamonds$cut)
## 
##      Fair      Good Very Good   Premium     Ideal 
##      1610      4906     12082     13791     21551
by(diamonds$price,diamonds$cut,summary)
## diamonds$cut: Fair
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     337    2050    3282    4359    5206   18570 
## -------------------------------------------------------- 
## diamonds$cut: Good
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     327    1145    3050    3929    5028   18790 
## -------------------------------------------------------- 
## diamonds$cut: Very Good
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     336     912    2648    3982    5373   18820 
## -------------------------------------------------------- 
## diamonds$cut: Premium
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     326    1046    3185    4584    6296   18820 
## -------------------------------------------------------- 
## diamonds$cut: Ideal
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     326     878    1810    3458    4678   18810

标尺的衡量

qplot(x = price, data = diamonds) + facet_wrap(~cut,,scales = "free_y")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#每克拉价格的直方图

qplot(x = carat,data = diamonds,binwidth = 0.1,
      color = I('black'),fill = I('#E38EFF')) +
  scale_x_continuous(limits = c(0,3),breaks = seq(0,3,0.2)) +
  facet_wrap(~cut)
## Warning: Removed 32 rows containing non-finite values (stat_bin).

#箱线图

qplot(data = diamonds,x = color,y =price,geom = 'boxplot') +
  coord_cartesian(ylim = c(0,7700)) + 
  scale_y_continuous(breaks = seq(0,8000,500))

#单个因素的描述性统计量

summary(subset(diamonds,diamonds$color == 'J'))
##      carat              cut      color       clarity        depth      
##  Min.   :0.230   Fair     :119   D:   0   SI1    :750   Min.   :43.00  
##  1st Qu.:0.710   Good     :307   E:   0   VS2    :731   1st Qu.:61.20  
##  Median :1.110   Very Good:678   F:   0   VS1    :542   Median :62.00  
##  Mean   :1.162   Premium  :808   G:   0   SI2    :479   Mean   :61.89  
##  3rd Qu.:1.520   Ideal    :896   H:   0   VVS2   :131   3rd Qu.:62.70  
##  Max.   :5.010                   I:   0   VVS1   : 74   Max.   :73.60  
##                                  J:2808   (Other):101                  
##      table           price             x                y         
##  Min.   :51.60   Min.   :  335   Min.   : 3.930   Min.   : 3.900  
##  1st Qu.:56.00   1st Qu.: 1860   1st Qu.: 5.700   1st Qu.: 5.718  
##  Median :58.00   Median : 4234   Median : 6.640   Median : 6.630  
##  Mean   :57.81   Mean   : 5324   Mean   : 6.519   Mean   : 6.518  
##  3rd Qu.:59.00   3rd Qu.: 7695   3rd Qu.: 7.380   3rd Qu.: 7.380  
##  Max.   :68.00   Max.   :18710   Max.   :10.740   Max.   :10.540  
##                                                                   
##        z        
##  Min.   :2.460  
##  1st Qu.:3.530  
##  Median :4.110  
##  Mean   :4.033  
##  3rd Qu.:4.580  
##  Max.   :6.980  
## 

频数多边形

qplot(data = diamonds,x = carat,binwidth = 0.5,
      geom = 'freqpoly',color = color) +
  scale_x_continuous(limits = c(0,5),breaks = seq(0,5,0.5)) +
  scale_y_continuous(breaks = seq(0,15000,1000))
## Warning: Removed 1 rows containing non-finite values (stat_bin).
## Warning: Removed 14 rows containing missing values (geom_path).