## T1608607
## Daitu
## 2016-8-28

## 第一列数据指的是间隔时间 第二列数据是数量
## 分析要求:用适当的拟合函数将两组数据进行拟合,看大致呈现什么分布
## 最后需要源代码和函数参数


## 更改工作文件夹####----------------------------------------------
rm(list = ls());gc()
##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 365957 19.6     592000 31.7   460000 24.6
## Vcells 569345  4.4    1308461 10.0   786094  6.0
## 
setwd("/Users/daitu/数据分析/订单/T1608607")
getwd()
## [1] "/Users/daitu/数据分析/订单/T1608607"
## 该行代码,如果是win环境需要求小注释
## windowsFonts(STKaiti=windowsFont("KaiTi_GB2312"))

## 加载需要的包####------------------------------------------------
library(readxl)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.4
library(MASS)



## 读取数据
data <- read_excel("1980DELTA_20160626.xlsx",col_names = FALSE)
colnames(data) <- c("x","y")

## 将数据可视化查看
ggplot(data = data) +
  theme_bw(base_family = "STKaiti") +
  geom_point(aes(x,y),stat = "identity",color = "red",size = 1) +
  labs(x = "Time",y = "Y",title = "")

## 从图像上看,可以认为是数据的分布形式与指数有关所以假设使用下面的方程来拟合
## f(x) = a*exp(b*x) + c*exp(d*x)
## 进行曲线拟合
result <- nls(y ~ a*exp(b*x) + c*exp(d*x),data = data,
              start = list(a = 1700,b = -2,c = 350,d = -0.1))
result
## Nonlinear regression model
##   model: y ~ a * exp(b * x) + c * exp(d * x)
##    data: data
##          a          b          c          d 
## 1733.06714   -2.30553  355.59974   -0.05144 
##  residual sum-of-squares: 58287
## 
## Number of iterations to convergence: 5 
## Achieved convergence tolerance: 5.908e-06
summary(result)
## 
## Formula: y ~ a * exp(b * x) + c * exp(d * x)
## 
## Parameters:
##     Estimate Std. Error t value Pr(>|t|)    
## a 1733.06714   15.12098  114.61   <2e-16 ***
## b   -2.30553    0.08565  -26.92   <2e-16 ***
## c  355.59974    7.34330   48.42   <2e-16 ***
## d   -0.05144    0.00136  -37.83   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.25 on 332 degrees of freedom
## 
## Number of iterations to convergence: 5 
## Achieved convergence tolerance: 5.908e-06
## 从结果中可以看中每个参数都是显著的


##图像可视化拟合结果
data$prey <- predict(result)  ## 预测值
## 可视化查看结果
ggplot(data = data) +
  theme_bw(base_family = "STKaiti") +
  geom_point(aes(x,y),stat = "identity",color = "red",size = 1) +
  geom_line(aes(x,prey),stat = "identity",color = "blue") +
  labs(x = "Time",y = "Y",title = "")

## 该数据看着也比较像指数分布
## 拟合指数分布
fite <- fitdistr(data$y,"exponential")
fite
##       rate   
##   0.03409437 
##  (0.00186000)
ggplot(data,aes(y))+
  theme_bw(base_family = "STKaiti") +
  geom_histogram(aes(y = ..density..),bins = 80,color = "lightblue",fill = "green",alpha = 0.6) +
  geom_density(aes(y),color = "red",bw = 10) +
  labs(x = "Y",y = "Density",title = "密度图")

## 在双对数坐标系上的分析,以 e 为底####----------------------------------
data2 <- as.data.frame(apply(data[2:dim(data)[1],1:2], 2, log))

## 将数据可视化查看
ggplot(data = data2) +
  theme_bw(base_family = "STKaiti") +
  geom_point(aes(x,y),stat = "identity",color = "red",size = 1) +
  labs(x = "log(Time)",y = "log(Y)",title = "")

## 进行曲线拟合
##  f(x) = p1*x^3 + p2*x^2 + p3*x + p4
result <- nls(y ~ a*x^3 + b*x^2 + c*x + d,data = data2,
              start = list(a = 0.1,b = -1,c = 3,d = 4))
result
## Nonlinear regression model
##   model: y ~ a * x^3 + b * x^2 + c * x + d
##    data: data2
##        a        b        c        d 
##  0.08483 -1.02245  2.28166  4.84454 
##  residual sum-of-squares: 60.6
## 
## Number of iterations to convergence: 1 
## Achieved convergence tolerance: 6.965e-08
summary(result)
## 
## Formula: y ~ a * x^3 + b * x^2 + c * x + d
## 
## Parameters:
##    Estimate Std. Error t value Pr(>|t|)    
## a  0.084826   0.004914  17.264   <2e-16 ***
## b -1.022445   0.062023 -16.485   <2e-16 ***
## c  2.281665   0.245417   9.297   <2e-16 ***
## d  4.844538   0.313316  15.462   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4279 on 331 degrees of freedom
## 
## Number of iterations to convergence: 1 
## Achieved convergence tolerance: 6.965e-08
## 从结果中可以看中每个参数都是显著的


##图像可视化拟合结果
data2$prey <- predict(result)  ## 预测值
## 可视化查看结果
ggplot(data = data2) +
  theme_bw(base_family = "STKaiti") +
  geom_point(aes(x,y),stat = "identity",color = "red",size = 1) +
  geom_line(aes(x,prey),stat = "identity",color = "blue") +
  labs(x = "log(Time)",y = "log(Y)",title = "")

## y的密度图
ggplot(data2,aes(y))+
  theme_bw(base_family = "STKaiti") +
  geom_histogram(aes(y = ..density..),bins = 20,color = "lightblue",fill = "green",alpha = 0.6) +
  geom_density(aes(y),color = "red",bw = 0.5) +
  labs(x = "Y",y = "Density",title = "密度图")

## 在双对数坐标系上的分析,以10 为底####----------------------------------
data2 <- as.data.frame(apply(data[2:dim(data)[1],1:2], 2, log10))

## 将数据可视化查看
ggplot(data = data2) +
  theme_bw(base_family = "STKaiti") +
  geom_point(aes(x,y),stat = "identity",color = "red",size = 1) +
  labs(x = "log10(Time)",y = "log10(Y)",title = "")

## 进行曲线拟合
##  f(x) = p1*x^3 + p2*x^2 + p3*x + p4
result <- nls(y ~ a*x^3 + b*x^2 + c*x + d,data = data2,
              start = list(a = 0.1,b = -1,c = 3,d = 4))
result
## Nonlinear regression model
##   model: y ~ a * x^3 + b * x^2 + c * x + d
##    data: data2
##       a       b       c       d 
##  0.4497 -2.3543  2.2817  2.1040 
##  residual sum-of-squares: 11.43
## 
## Number of iterations to convergence: 1 
## Achieved convergence tolerance: 1.081e-07
summary(result)
## 
## Formula: y ~ a * x^3 + b * x^2 + c * x + d
## 
## Parameters:
##   Estimate Std. Error t value Pr(>|t|)    
## a  0.44974    0.02605  17.264   <2e-16 ***
## b -2.35427    0.14281 -16.485   <2e-16 ***
## c  2.28166    0.24542   9.297   <2e-16 ***
## d  2.10396    0.13607  15.462   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1858 on 331 degrees of freedom
## 
## Number of iterations to convergence: 1 
## Achieved convergence tolerance: 1.081e-07
## 从结果中可以看中每个参数都是显著的


##图像可视化拟合结果
data2$prey <- predict(result)  ## 预测值
## 可视化查看结果
ggplot(data = data2) +
  theme_bw(base_family = "STKaiti") +
  geom_point(aes(x,y),stat = "identity",color = "red",size = 1) +
  geom_line(aes(x,prey),stat = "identity",color = "blue") +
  labs(x = "log10(Time)",y = "log10(Y)",title = "")

## y的密度图
ggplot(data2,aes(y))+
  theme_bw(base_family = "STKaiti") +
  geom_histogram(aes(y = ..density..),bins = 20,color = "lightblue",fill = "green",alpha = 0.6) +
  geom_density(aes(y),color = "red",bw = 0.5) +
  labs(x = "Y",y = "Density",title = "密度图")