2016. 12. 13 해양자료실
대상자료 : 201609_PH_HUSS.cnv
데이터 구조(R dataframe)
str(df1)
'data.frame': 24866 obs. of 13 variables:
$ no : int 1 2 3 4 5 6 7 8 9 10 ...
$ datee : num 245 245 245 245 245 ...
$ lat : num 35 35 35 35 35 ...
$ long : num 129 129 129 129 129 ...
$ pressure : num 0 0 0 0 0 0 0 0 0 0 ...
$ stemp : num 25.3 25.3 25.3 25.3 25.3 ...
$ salinity : num 31.1 31.1 31.1 31.1 31.1 ...
$ density1 : num 20.3 20.3 20.3 20.3 20.3 ...
$ density2 : num 20.3 20.3 20.3 20.3 20.3 ...
$ soundv : num 1531 1531 1531 1531 1531 ...
$ sal_flag : chr "F" "F" "F" "F" ...
$ stemp_flag: chr "F" "F" "F" "F" ...
$ sa_flag : chr "F" "F" "F" "F" ...
names(df1)[1]<-"no"
maxr <-nrow(df1)
sal<-df1$salinity
stemp<-df1$stemp
head(df1)
summary(df1)
no datee lat long pressure
Min. : 1 Min. :245.4 Min. :35.01 Min. :129.0 Min. :0
1st Qu.: 6217 1st Qu.:246.1 1st Qu.:36.10 1st Qu.:129.5 1st Qu.:0
Median :12434 Median :246.8 Median :36.21 Median :129.6 Median :0
Mean :12434 Mean :246.8 Mean :36.13 Mean :129.6 Mean :0
3rd Qu.:18650 3rd Qu.:247.6 3rd Qu.:36.27 3rd Qu.:129.8 3rd Qu.:0
Max. :24866 Max. :248.3 Max. :36.43 Max. :130.0 Max. :0
stemp salinity density1 density2
Min. :22.10 Min. :10.15 Min. : 4.667 Min. : 4.667
1st Qu.:24.72 1st Qu.:30.56 1st Qu.:20.009 1st Qu.:20.009
Median :24.95 Median :31.08 Median :20.328 Median :20.328
Mean :24.95 Mean :30.86 Mean :20.228 Mean :20.228
3rd Qu.:25.18 3rd Qu.:31.41 3rd Qu.:20.672 3rd Qu.:20.672
Max. :30.48 Max. :33.34 Max. :21.954 Max. :21.954
soundv sal_flag stemp_flag sa_flag
Min. :1507 Length:24866 Length:24866 Length:24866
1st Qu.:1529 Class :character Class :character Class :character
Median :1530 Mode :character Mode :character Mode :character
Mean :1530
3rd Qu.:1531
Max. :1543
Quantile values of Seawater temperature
quantile(df1$stemp)
0% 25% 50% 75% 100%
22.0952 24.7208 24.9513 25.1791 30.4806
IQR Calcuation
UpperQ = fivenum(df1$stemp)[4]
LowerQ = fivenum(df1$stemp)[2]
IQR = UpperQ - LowerQ
upperOutlier = df1$stemp[ which( df1$stemp > UpperQ + IQR * 1.5) ]
lowerOutlier = df1$stemp[ which( df1$stemp < LowerQ - IQR * 1.5) ]
IQR value :
IQR
[1] 0.4583
length(upperOutlier)
[1] 422
length(lowerOutlier)
[1] 686
IQR Outlier detect & insert flags..
for (i in 1:maxr) {
if (stemp[i] > UpperQ + IQR * 1.5 | stemp[i] < LowerQ - IQR * 1.5) {
df1$stemp_flag[i] <-"T"
}
else {
df1$stemp_flag[i] <-"F"
}
}
Visualizations data..
#par(mfrow=c(3,1))
boxplot(df1$stemp, horizontal = T, col="lightgrey", xlab="Seawater temperature", ylab="", main="Boxplot of Seawater temperature")

hist(df1$stemp, breaks=50, probability = TRUE, col="lightgrey", main="Histogram of Seawater temperature")

#lines(density(df1$stemp), col="red", lwd=2)
plot(df1$no, df1$stemp, type="p", main="Scatterplot of Seawater temperature", pch=21)

ggplot(df1, aes(x=no, y=stemp, col=sal_flag)) + geom_point(size=1) +
scale_color_manual(values=c("black", "red")) +ggtitle("Sea temperature Plot")

#Ployly
plot_ly(df1, x=no, y=stemp, mode="markers", color = sal_flag, colors=c("black", "red"))
No trace type specified:
Based on info supplied, a 'scatter' trace seems appropriate.
Read more about this trace type -> https://plot.ly/r/reference/#scatter
No trace type specified:
Based on info supplied, a 'scatter' trace seems appropriate.
Read more about this trace type -> https://plot.ly/r/reference/#scatter
염분데이터 Outlier 분석 및 가시화
Quantile values of Seawater temperature
quantile(df1$salinity)
0% 25% 50% 75% 100%
10.14620 30.56385 31.08110 31.41320 33.33750
IQR Calcuation
UpperQ = fivenum(df1$salinity)[4]
LowerQ = fivenum(df1$salinity)[2]
IQR = UpperQ - LowerQ
upperOutlier = df1$salinity[ which( df1$salinity > UpperQ + IQR * 1.5) ]
lowerOutlier = df1$salinity[ which( df1$salinity < LowerQ - IQR * 1.5) ]
IQR, Number of upperOutlier & lowerOutlier values :
IQR
[1] 0.8494
length(upperOutlier)
[1] 2
length(lowerOutlier)
[1] 985
IQR Outlier detect & insert flags..
for (i in 1:maxr) {
if (sal[i] > UpperQ + IQR * 1.5 | sal[i] < LowerQ - IQR * 1.5) {
df1$sal_flag[i] <-"T"
}
else {
df1$sal_flag[i] <-"F"
}
}
Visualization data….
boxplot(df1$salinity, horizontal = T, col="lightgrey", xlab="Salinity", ylab="", main="Boxplot of Salinity")

hist(df1$salinity, breaks=50, probability = TRUE, col="lightgrey", main="Histogram of Salinity")

plot(df1$no, df1$salinity, type="p", main="Scatterplot of Salinity", pch=21)

ggplot(df1, aes(x=no, y=salinity, col=sal_flag)) + geom_point(size=1) +
scale_color_manual(values=c("black", "red"))+ggtitle("Salinity Plot")

#Ployly
plot_ly(df1, x=no, y=salinity, mode="markers", color = sal_flag, colors=c("black", "red"))
No trace type specified:
Based on info supplied, a 'scatter' trace seems appropriate.
Read more about this trace type -> https://plot.ly/r/reference/#scatter
No trace type specified:
Based on info supplied, a 'scatter' trace seems appropriate.
Read more about this trace type -> https://plot.ly/r/reference/#scatter
LS0tCnRpdGxlOiAiSVFS7J2EIOydtOyaqe2VnCDsiJjsmKgsIOyXvOu2hOuNsOydtO2EsCDrtoTshJ0g67CPIOyVhOybg+udvOydtOyWtCDqsoDstpwiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KIyMjIDIwMTYuIDEyLiAxMyDtlbTslpHsnpDro4zsi6QKIyMjIOuMgOyDgeyekOujjCA6IDIwMTYwOV9QSF9IVVNTLmNudgoKIyMjIOuNsOydtO2EsCDqtazsobAoUiBkYXRhZnJhbWUpCmBgYHtyfQpzdHIoZGYxKQpuYW1lcyhkZjEpWzFdPC0ibm8iCm1heHIgPC1ucm93KGRmMSkKc2FsPC1kZjEkc2FsaW5pdHkKc3RlbXA8LWRmMSRzdGVtcApoZWFkKGRmMSkKc3VtbWFyeShkZjEpCmBgYAojIyMgUXVhbnRpbGUgdmFsdWVzIG9mIFNlYXdhdGVyIHRlbXBlcmF0dXJlCmBgYHtyfQpxdWFudGlsZShkZjEkc3RlbXApCmBgYAojIyMgSVFSIENhbGN1YXRpb24KYGBge3J9ClVwcGVyUSA9IGZpdmVudW0oZGYxJHN0ZW1wKVs0XQpMb3dlclEgPSBmaXZlbnVtKGRmMSRzdGVtcClbMl0KSVFSID0gVXBwZXJRIC0gTG93ZXJRCnVwcGVyT3V0bGllciA9IGRmMSRzdGVtcFsgd2hpY2goIGRmMSRzdGVtcCA+IFVwcGVyUSArIElRUiAqIDEuNSkgXQpsb3dlck91dGxpZXIgPSBkZjEkc3RlbXBbIHdoaWNoKCBkZjEkc3RlbXAgPCBMb3dlclEgLSBJUVIgKiAxLjUpIF0KYGBgCiMjIyBJUVIgdmFsdWUgOgpgYGB7cn0KSVFSCmxlbmd0aCh1cHBlck91dGxpZXIpCmxlbmd0aChsb3dlck91dGxpZXIpCmBgYAoKIyMjIElRUiBPdXRsaWVyIGRldGVjdCAmIGluc2VydCBmbGFncy4uCmBgYHtyfQpmb3IgKGkgaW4gMTptYXhyKSB7CiAgaWYgKHN0ZW1wW2ldID4gVXBwZXJRICsgSVFSICogMS41IHwgc3RlbXBbaV0gPCBMb3dlclEgLSBJUVIgKiAxLjUpIHsKICAgIGRmMSRzdGVtcF9mbGFnW2ldIDwtIlQiCiAgfQogIGVsc2UgewogICAgZGYxJHN0ZW1wX2ZsYWdbaV0gPC0iRiIKICB9Cn0KYGBgCgojIyMgVmlzdWFsaXphdGlvbnMgZGF0YS4uCmBgYHtyfQojcGFyKG1mcm93PWMoMywxKSkKYm94cGxvdChkZjEkc3RlbXAsIGhvcml6b250YWwgPSBULCBjb2w9ImxpZ2h0Z3JleSIsIHhsYWI9IlNlYXdhdGVyIHRlbXBlcmF0dXJlIiwgeWxhYj0iIiwgbWFpbj0iQm94cGxvdCBvZiBTZWF3YXRlciB0ZW1wZXJhdHVyZSIpCmhpc3QoZGYxJHN0ZW1wLCBicmVha3M9NTAsIHByb2JhYmlsaXR5ID0gVFJVRSwgY29sPSJsaWdodGdyZXkiLCBtYWluPSJIaXN0b2dyYW0gb2YgU2Vhd2F0ZXIgdGVtcGVyYXR1cmUiKQojbGluZXMoZGVuc2l0eShkZjEkc3RlbXApLCBjb2w9InJlZCIsIGx3ZD0yKQpwbG90KGRmMSRubywgIGRmMSRzdGVtcCwgdHlwZT0icCIsIG1haW49IlNjYXR0ZXJwbG90IG9mIFNlYXdhdGVyIHRlbXBlcmF0dXJlIiwgcGNoPTIxKQpgYGAKCmBgYHtyfQpnZ3Bsb3QoZGYxLCBhZXMoeD1ubywgeT1zdGVtcCwgY29sPXNhbF9mbGFnKSkgKyAgZ2VvbV9wb2ludChzaXplPTEpICsKICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzPWMoImJsYWNrIiwgInJlZCIpKSArZ2d0aXRsZSgiU2Vhd2F0ZXIgdGVtcGVyYXR1cmUgUGxvdCIpCmBgYAoKYGBge3J9CiNQbG95bHkKcGxvdF9seShkZjEsIHg9bm8sIHk9c3RlbXAsIG1vZGU9Im1hcmtlcnMiLCBjb2xvciA9IHNhbF9mbGFnLCBjb2xvcnM9YygiYmxhY2siLCAicmVkIikpCmBgYAoKIyDsl7zrtoTrjbDsnbTthLAgT3V0bGllciDrtoTshJ0g67CPIOqwgOyLnO2ZlAoKIyMjIFF1YW50aWxlIHZhbHVlcyBvZiBTZWF3YXRlciB0ZW1wZXJhdHVyZQpgYGB7cn0KcXVhbnRpbGUoZGYxJHNhbGluaXR5KQpgYGAKIyMjIElRUiBDYWxjdWF0aW9uCmBgYHtyfQpVcHBlclEgPSBmaXZlbnVtKGRmMSRzYWxpbml0eSlbNF0KTG93ZXJRID0gZml2ZW51bShkZjEkc2FsaW5pdHkpWzJdCklRUiA9IFVwcGVyUSAtIExvd2VyUQp1cHBlck91dGxpZXIgPSBkZjEkc2FsaW5pdHlbIHdoaWNoKCBkZjEkc2FsaW5pdHkgPiBVcHBlclEgKyBJUVIgKiAxLjUpIF0KbG93ZXJPdXRsaWVyID0gZGYxJHNhbGluaXR5WyB3aGljaCggZGYxJHNhbGluaXR5IDwgTG93ZXJRIC0gSVFSICogMS41KSBdCmBgYAojIyMgSVFSLCBOdW1iZXIgb2YgdXBwZXJPdXRsaWVyICYgbG93ZXJPdXRsaWVyIHZhbHVlcyA6CmBgYHtyfQpJUVIKbGVuZ3RoKHVwcGVyT3V0bGllcikKbGVuZ3RoKGxvd2VyT3V0bGllcikKYGBgCgojIyMgSVFSIE91dGxpZXIgZGV0ZWN0ICYgaW5zZXJ0IGZsYWdzLi4KYGBge3J9CmZvciAoaSBpbiAxOm1heHIpIHsKICBpZiAoc2FsW2ldID4gVXBwZXJRICsgSVFSICogMS41IHwgc2FsW2ldIDwgTG93ZXJRIC0gSVFSICogMS41KSB7CiAgICBkZjEkc2FsX2ZsYWdbaV0gPC0iVCIKICB9CiAgZWxzZSB7CiAgICBkZjEkc2FsX2ZsYWdbaV0gPC0iRiIKICB9Cn0KYGBgCgojIFZpc3VhbGl6YXRpb24gZGF0YS4uLi4KYGBge3J9CmJveHBsb3QoZGYxJHNhbGluaXR5LCBob3Jpem9udGFsID0gVCwgY29sPSJsaWdodGdyZXkiLCB4bGFiPSJTYWxpbml0eSIsIHlsYWI9IiIsIG1haW49IkJveHBsb3Qgb2YgU2FsaW5pdHkiKQpoaXN0KGRmMSRzYWxpbml0eSwgYnJlYWtzPTUwLCBwcm9iYWJpbGl0eSA9IFRSVUUsIGNvbD0ibGlnaHRncmV5IiwgbWFpbj0iSGlzdG9ncmFtIG9mIFNhbGluaXR5IikKcGxvdChkZjEkbm8sICBkZjEkc2FsaW5pdHksIHR5cGU9InAiLCBtYWluPSJTY2F0dGVycGxvdCBvZiBTYWxpbml0eSIsIHBjaD0yMSkKYGBgCgpgYGB7cn0KZ2dwbG90KGRmMSwgYWVzKHg9bm8sIHk9c2FsaW5pdHksIGNvbD1zYWxfZmxhZykpICsgIGdlb21fcG9pbnQoc2l6ZT0xKSArCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcz1jKCJibGFjayIsICJyZWQiKSkrZ2d0aXRsZSgiU2FsaW5pdHkgUGxvdCIpCmBgYAoKYGBge3J9CiNQbG95bHkKcGxvdF9seShkZjEsIHg9bm8sIHk9c2FsaW5pdHksIG1vZGU9Im1hcmtlcnMiLCBjb2xvciA9IHNhbF9mbGFnLCBjb2xvcnM9YygiYmxhY2siLCAicmVkIikpCmBgYAoKCg==