数理漁業資源学 A0522006

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

# 変数の消去
rm(list = ls())

# パッケージ　`pacman`を使って必要なパッケージをインストール
if(!require("pacman")) install.packages("pacman")

## Loading required package: pacman

pacman::p_load("tidyverse", 
               "gt",
               "skimr",
               "showtext")

# 表示を科学表示から変更
 options(scipen = 999)
 
#   日本語フォントの追加
 # フォントの追加と設定（適宜日本語フォントを指定してください）
font_add_google("Noto Sans JP", "noto")
showtext::showtext_auto()

# CSVファイルを読み込む
dat_hirame <- read_csv("qfr_2024_hirame_2.csv")

## Rows: 3000 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Date, Market, Gear_Type, Species, Highest_Price, Lowest_Price
## dbl (3): Number_of_Vessels, Landing_Amount, Average_Price
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# データの概要
dat_hirame |> skimr::skim()

Data summary
Name	dat_hirame
Number of rows	3000
Number of columns	9
_______________________
Column type frequency:
character	6
numeric	3
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	n_unique
Date	0	1.00	8	10	2634
Market	0	1.00	2	3	3
Gear_Type	0	1.00	3	3	3
Species	0	1.00	2	6	4
Highest_Price	31	0.99	1	5	301
Lowest_Price	31	0.99	1	4	309

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
Number_of_Vessels	0	1.00	4.60	3.75	1	2	4	6	32	▇▂▁▁▁
Landing_Amount	39	0.99	33.44	377.45	-100	5	14	31	20000	▇▁▁▁▁
Average_Price	31	0.99	1554.21	1121.89	-30000	924	1345	1932	9313	▁▁▁▇▆

#対象のAverage_Priceについて要約する
dat_hirame$Average_Price |> summary()

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -30000     924    1345    1554    1932    9313      31

#NAを除去する
dat_hirame<- dat_hirame[complete.cases(dat_hirame), ]
print(dat_hirame)

## # A tibble: 2,943 × 9
##    Date  Market Gear_Type Number_of_Vessels Species Landing_Amount Highest_Price
##    <chr> <chr>  <chr>                 <dbl> <chr>            <dbl> <chr>        
##  1 2018… 宮古   その他                    1 ヒラメ               3 1000         
##  2 2008… 大船渡 底刺網                    7 ヒラメ              48 2800         
##  3 1997… 大船渡 その他                   12 ヒラメ              88 5500         
##  4 2008… 大船渡 底刺網                    5 ヒラメ              41 2000         
##  5 1995… 大船渡 その他                   11 ヒラメ              65 3500         
##  6 2017… 大船渡 底刺網                    2 ヒラメ              14 3000         
##  7 2010… 釜石   底刺網                    1 ヒラメ               9 1969         
##  8 1994… 大船渡 その他                    1 ヒラメ               1              
##  9 2020… 大船渡 底刺網                    2 ヒラメ               3 1690         
## 10 2010… 大船渡 底刺網                    8 ヒラメ              16 3500         
## # ℹ 2,933 more rows
## # ℹ 2 more variables: Average_Price <dbl>, Lowest_Price <chr>

# 平均価格の分布
dat_hirame |> filter(Average_Price > 0) |> #summaryとデータフレームから判断して-30000と−300は外れ値とし、0未満は除外したヒストグラムを作成
  ggplot(aes(x = Average_Price)) +
  geom_histogram(fill = "salmon", color = "black") +
  labs(title = "平均価格の分布", x = "価格（円）", y = "頻度") +
   theme(text = element_text(family = "noto"))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# boxplotで、平均価格の標準偏差や平均、外れ値を確認する　
dat_hirame |> filter(Average_Price > 0) |> 
  ggplot(aes(y = Average_Price)) +
  geom_boxplot() +
  labs(title = "平均価格の分布", y = "平均価格(円)") +
   theme(text = element_text(family = "noto"))

# 4000以下に範囲を絞る
dat_hirame |> filter(Average_Price > 0, Average_Price < 4000) |> 
  ggplot(aes(y = Average_Price)) +
  geom_boxplot() +
  labs(title = "平均価格の分布", y = "平均価格(円)") +
   theme(text = element_text(family = "noto"))

#四分位範囲(IQR)の外れ値を除外した場合の最大値を求める、最小値は０以下になるので０とする
Q1 <- 924   
Q3 <- 1932
IQR <- Q3 - Q1
最大値 <- Q3 + IQR * 1.5

#下限は0, 最大は3444としてフィルターをかけ、外れ値を除外する
dat_hirame <- dat_hirame |> filter(between(Average_Price, 0, 3444), ) 
dat_hirame[complete.cases(dat_hirame), ]

## # A tibble: 2,808 × 9
##    Date  Market Gear_Type Number_of_Vessels Species Landing_Amount Highest_Price
##    <chr> <chr>  <chr>                 <dbl> <chr>            <dbl> <chr>        
##  1 2018… 宮古   その他                    1 ヒラメ               3 1000         
##  2 2008… 大船渡 底刺網                    7 ヒラメ              48 2800         
##  3 1997… 大船渡 その他                   12 ヒラメ              88 5500         
##  4 2008… 大船渡 底刺網                    5 ヒラメ              41 2000         
##  5 1995… 大船渡 その他                   11 ヒラメ              65 3500         
##  6 2017… 大船渡 底刺網                    2 ヒラメ              14 3000         
##  7 2010… 釜石   底刺網                    1 ヒラメ               9 1969         
##  8 1994… 大船渡 その他                    1 ヒラメ               1              
##  9 2020… 大船渡 底刺網                    2 ヒラメ               3 1690         
## 10 2010… 大船渡 底刺網                    8 ヒラメ              16 3500         
## # ℹ 2,798 more rows
## # ℹ 2 more variables: Average_Price <dbl>, Lowest_Price <chr>

print(dat_hirame)

## # A tibble: 2,808 × 9
##    Date  Market Gear_Type Number_of_Vessels Species Landing_Amount Highest_Price
##    <chr> <chr>  <chr>                 <dbl> <chr>            <dbl> <chr>        
##  1 2018… 宮古   その他                    1 ヒラメ               3 1000         
##  2 2008… 大船渡 底刺網                    7 ヒラメ              48 2800         
##  3 1997… 大船渡 その他                   12 ヒラメ              88 5500         
##  4 2008… 大船渡 底刺網                    5 ヒラメ              41 2000         
##  5 1995… 大船渡 その他                   11 ヒラメ              65 3500         
##  6 2017… 大船渡 底刺網                    2 ヒラメ              14 3000         
##  7 2010… 釜石   底刺網                    1 ヒラメ               9 1969         
##  8 1994… 大船渡 その他                    1 ヒラメ               1              
##  9 2020… 大船渡 底刺網                    2 ヒラメ               3 1690         
## 10 2010… 大船渡 底刺網                    8 ヒラメ              16 3500         
## # ℹ 2,798 more rows
## # ℹ 2 more variables: Average_Price <dbl>, Lowest_Price <chr>

str(dat_hirame)

## tibble [2,808 × 9] (S3: tbl_df/tbl/data.frame)
##  $ Date             : chr [1:2808] "2018-12-17" "2008-8-28" "1997-9-1" "2008-11-27" ...
##  $ Market           : chr [1:2808] "宮古" "大船渡" "大船渡" "大船渡" ...
##  $ Gear_Type        : chr [1:2808] "その他" "底刺網" "その他" "底刺網" ...
##  $ Number_of_Vessels: num [1:2808] 1 7 12 5 11 2 1 1 2 8 ...
##  $ Species          : chr [1:2808] "ヒラメ" "ヒラメ" "ヒラメ" "ヒラメ" ...
##  $ Landing_Amount   : num [1:2808] 3 48 88 41 65 14 9 1 3 16 ...
##  $ Highest_Price    : chr [1:2808] "1000" "2800" "5500" "2000" ...
##  $ Average_Price    : num [1:2808] 1000 1368 1922 1190 1506 ...
##  $ Lowest_Price     : chr [1:2808] "1000" "200" "1200" "500" ...

#データの確認
skimr::skim(dat_hirame)

Data summary
Name	dat_hirame
Number of rows	2808
Number of columns	9
_______________________
Column type frequency:
character	6
numeric	3
________________________
Group variables	None

Variable type: character

skim_variable	complete_rate	min	max	n_unique
Date	1	8	10	2461
Market	1	2	3	3
Gear_Type	1	3	3	3
Species	1	2	6	4
Highest_Price	1	1	4	289
Lowest_Price	1	1	4	296

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
Number_of_Vessels	1	4.66	3.75	1	2	4	6	32	▇▂▁▁▁
Landing_Amount	1	34.62	387.53	-100	6	14	32	20000	▇▁▁▁▁
Average_Price	1	1427.09	696.22	15	902	1308	1830	3440	▃▇▆▂▁

数理漁業資源学 A0522006

Kei Oyama

2024-06-26

R Markdown

Including Plots