#Q1泊松分布与地震次数 #(a)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(ggrepel)
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(dplyr)
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
alldata <- read_xlsx("earthquake_2015_2024.xlsx")
colnames(alldata) <- c("id", "time", "lon", "lat", "center_depth", "level", "location","type")
alldata$level = as.numeric(alldata$level)
data = alldata[alldata$level >= 4 & alldata$id <= 1066,]
monthly_counts <- data %>%
mutate(
Year = year(time),
Month = month(time)
)
tempfile <- monthly_counts %>%
group_by(Month) %>%
summarise(
count = n(),
)
plot(tempfile$Month, tempfile$count, type="l")
#(b)
library(readxl)
library(dplyr)
alldata <- read_xlsx("earthquake_2015_2024.xlsx")
colnames(alldata) <- c("id", "time", "lon", "lat", "center_depth", "level", "location","type")
alldata$level = as.numeric(alldata$level)
data = alldata[alldata$level >= 4,]
monthly_counts <- data %>%
mutate(
Year = year(time),
Month = month(time)
)
tempfile <- monthly_counts %>%
group_by(Year ,Month) %>%
summarise(
frequency = n(),
)
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
tempfile1 <- tempfile %>%
group_by(frequency) %>%
summarise(
count = n(),
)
total <- sum(tempfile1$count)
probability <- tempfile1$count / total
Expected_value <- sum(tempfile1$frequency * probability)
cat("估计λ的值为:",Expected_value)
## 估计λ的值为: 14.525
#(c)
lambda <- 14.525
p1 <- ppois(0, lambda, lower.tail = TRUE, log.p = FALSE)
p2 <- ppois(4, lambda, lower.tail = TRUE, log.p = FALSE)
p3 <- ppois(20, lambda, lower.tail = FALSE, log.p = FALSE)
cat("根据泊松分布近似,一个月内没有发生震级超过四级的地震的概率为:",p1,"一个月发生小于5次震级超过四级的地震的概率为:",p2,"一个月发生超过20次震级超过四级的地震的概率为:",p3)
## 根据泊松分布近似,一个月内没有发生震级超过四级的地震的概率为: 4.918953e-07 一个月发生小于5次震级超过四级的地震的概率为: 0.00122303 一个月发生超过20次震级超过四级的地震的概率为: 0.0646632
#Q2正态概率与分位数 #前置运算
library(HistData)
library(dplyr)
data(Galton)
Galton$child = as.numeric(Galton$child)
Data1 <- Galton %>%
group_by(child) %>%
summarise(
count = n()
)
#(a)
totalnumber <- sum(Data1$count)
totalheight <- sum(Data1$child * Data1$count)
average <- totalheight/totalnumber
probability1 <- Data1$count/totalnumber
variance <- sum(probability1 * (Data1$child - average) * (Data1$child - average))
sd1 <- sqrt(variance)
cat("样本均值为:",average,"方差为",variance,"拟合的正态分布为X~N(",average,",",variance,")" )
## 样本均值为: 68.08847 方差为 6.333197 拟合的正态分布为X~N( 68.08847 , 6.333197 )
#(b)
hist(Galton$child,
main = "身高的频率分布直方图",
xlab = "身高",
ylab = "频率",
col = "lightblue",
border = "black")
curve(dnorm(x,mean = 68.09,sd = sd1),from = 60,to = 76,
main = "身高的正态分布密度曲线",
xlab = "身高",
ylab = "密度"
)
#(c)
a <- 66
b <- 71
c <- 65
d <- 72
e <- 74
p1 <- pnorm(b, mean = average, sd = sd1) - pnorm(a, mean = average, sd = sd1)
p2 <- pnorm(d, mean = average, sd = sd1) - pnorm(c, mean = average, sd = sd1)
p3 <- 1 - pnorm(e, mean = average, sd = sd1)
cat("介于区间[66,71]的概率为",p1,"介于区间[65,72]的概率为",p2,"身高大于74inch的概率为",p3)
## 介于区间[66,71]的概率为 0.6730484 介于区间[65,72]的概率为 0.8300788 身高大于74inch的概率为 0.009411215
#(d)
probs <- seq(0.05,0.95,by = 0.05)
expected1 <- qnorm(probs, mean = average, sd = sd1, lower.tail = FALSE, log.p = FALSE)
expected2 <- qnorm(probs, mean = average, sd = sd1, lower.tail = TRUE, log.p = FALSE)
#(e)
probs <- seq(0.05,0.95,by = 0.05)
experience <- quantile(Galton$child, probs)
plot(probs,experience,type = "l",col = "blue",lwd = 2,
xlab = "概率", ylab = "分位数值",
main = "正态分位数与经验分位数比较",
ylim = range(c(experience, expected2)))
lines(probs, expected2, col = "red", lwd = 2, lty = 2)
legend("topleft",
legend = c("经验分位数", "正态分位数"),
col = c("blue", "red"),
lwd = 2, lty = c(1, 2),
bty = "n")