── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(randomForest)
Warning: package 'randomForest' was built under R version 4.4.2
randomForest 4.7-1.2
Type rfNews() to see new features/changes/bug fixes.
Attaching package: 'randomForest'
The following object is masked from 'package:dplyr':
combine
The following object is masked from 'package:ggplot2':
margin
library(neuralnet)
Warning: package 'neuralnet' was built under R version 4.4.2
Attaching package: 'neuralnet'
The following object is masked from 'package:dplyr':
compute
library(caret)
Warning: package 'caret' was built under R version 4.4.2
Loading required package: lattice
Attaching package: 'caret'
The following object is masked from 'package:purrr':
lift
library(survival)
Attaching package: 'survival'
The following object is masked from 'package:caret':
cluster
'data.frame': 500 obs. of 10 variables:
$ ID : chr "SH001" "SH002" "SH003" "SH004" ...
$ sex : chr "Female" "Female" "Female" "Male" ...
$ blotch: num 37.2 34.5 36.3 35.3 37.4 ...
$ BPM : int 148 158 125 161 138 126 166 135 132 127 ...
$ weight: num 74.7 73.4 71.8 104.6 67.1 ...
$ length: num 187 189 284 171 264 ...
$ air : num 37.7 35.7 34.8 36.2 33.6 ...
$ water : num 23.4 21.4 20.1 21.6 21.8 ...
$ meta : num 64.1 73.7 54.4 86.3 108 ...
$ depth : num 53.2 49.6 49.4 50.3 49 ...
summary(sharks)
ID sex blotch BPM
Length:500 Length:500 Min. :30.78 Min. :119.0
Class :character Class :character 1st Qu.:34.16 1st Qu.:129.0
Mode :character Mode :character Median :35.05 Median :142.0
Mean :35.13 Mean :141.8
3rd Qu.:36.05 3rd Qu.:153.2
Max. :40.08 Max. :166.0
weight length air water
Min. : 65.10 Min. :128.3 Min. :33.00 Min. :20.01
1st Qu.: 75.68 1st Qu.:172.0 1st Qu.:34.42 1st Qu.:21.55
Median : 87.82 Median :211.1 Median :35.43 Median :23.11
Mean : 87.94 Mean :211.0 Mean :35.54 Mean :23.02
3rd Qu.:100.40 3rd Qu.:251.8 3rd Qu.:36.71 3rd Qu.:24.37
Max. :110.94 Max. :291.0 Max. :38.00 Max. :25.99
meta depth
Min. : 50.03 Min. :44.64
1st Qu.: 67.39 1st Qu.:48.90
Median : 82.45 Median :50.14
Mean : 82.04 Mean :50.14
3rd Qu.: 95.97 3rd Qu.:51.35
Max. :112.45 Max. :56.83
str(sharksub)
'data.frame': 50 obs. of 4 variables:
$ ID : chr "SH269" "SH163" "SH008" "SH239" ...
$ sex : chr "Female" "Female" "Female" "Female" ...
$ blotch1: num 36.1 33.4 36.3 35 35.7 ...
$ blotch2: num 37.2 34.4 36.5 36 36.8 ...
summary(sharksub)
ID sex blotch1 blotch2
Length:50 Length:50 Min. :32.49 Min. :33.47
Class :character Class :character 1st Qu.:34.38 1st Qu.:35.31
Mode :character Mode :character Median :34.94 Median :35.94
Mean :35.03 Mean :35.96
3rd Qu.:35.90 3rd Qu.:36.78
Max. :37.07 Max. :38.18
ggplot(sharks, aes(x = air, y = water)) +geom_point(color ="blue", alpha =0.6) +labs(title ="AIR TEMP vs. WATER TEMP",x ="AIR TEMP (°C)",y ="WATER TEMP (°C)") +theme_minimal()+theme(axis.text =element_text(size =14),axis.title =element_text(size =14))
# 添加回归线ggplot(sharks, aes(x = air, y = water)) +geom_point(color ="blue", alpha =0.6) +geom_smooth(method ="lm", color ="red", se =TRUE) +labs(title ="AIR TEMP vs. WATER TEMP",x ="AIR TEMP (°C)",y ="WATER TEMP (°C)") +theme_minimal()+theme(axis.text =element_text(size =14),axis.title =element_text(size =14))
`geom_smooth()` using formula = 'y ~ x'
# 添加LOESS平滑曲线ggplot(sharks, aes(x = air, y = water)) +geom_point(position =position_jitter(width =0.2), size =2,) +geom_smooth(method ="loess", color ="green") +labs(title ="AIR TEMP vs. WATER TEMP",x ="AIR TEMP (°C)",y ="WATER TEMP (°C)")+theme_minimal()+theme(axis.text =element_text(size =14),axis.title =element_text(size =14))
`geom_smooth()` using formula = 'y ~ x'
set.seed(123)train_index <-createDataPartition(sharks$water, p =0.8, list =FALSE)train_data <- sharks[train_index, ]test_data <- sharks[-train_index, ]# 训练随机森林模型rf_model <-randomForest(water ~ air, data = train_data, ntree =500, importance =TRUE)# 查看模型摘要print(rf_model)
Call:
randomForest(formula = water ~ air, data = train_data, ntree = 500, importance = TRUE)
Type of random forest: regression
Number of trees: 500
No. of variables tried at each split: 1
Mean of squared residuals: 3.600593
% Var explained: -30.09