2016. 12. 13 해양자료실

대상자료 : 201609_PH_HUSS.cnv

데이터 구조(R dataframe)

str(df1)
'data.frame':   24866 obs. of  13 variables:
 $ no        : int  1 2 3 4 5 6 7 8 9 10 ...
 $ datee     : num  245 245 245 245 245 ...
 $ lat       : num  35 35 35 35 35 ...
 $ long      : num  129 129 129 129 129 ...
 $ pressure  : num  0 0 0 0 0 0 0 0 0 0 ...
 $ stemp     : num  25.3 25.3 25.3 25.3 25.3 ...
 $ salinity  : num  31.1 31.1 31.1 31.1 31.1 ...
 $ density1  : num  20.3 20.3 20.3 20.3 20.3 ...
 $ density2  : num  20.3 20.3 20.3 20.3 20.3 ...
 $ soundv    : num  1531 1531 1531 1531 1531 ...
 $ sal_flag  : chr  "F" "F" "F" "F" ...
 $ stemp_flag: chr  "F" "F" "F" "F" ...
 $ sa_flag   : chr  "F" "F" "F" "F" ...
names(df1)[1]<-"no"
maxr <-nrow(df1)
sal<-df1$salinity
stemp<-df1$stemp
head(df1)
summary(df1)
       no            datee            lat             long          pressure
 Min.   :    1   Min.   :245.4   Min.   :35.01   Min.   :129.0   Min.   :0  
 1st Qu.: 6217   1st Qu.:246.1   1st Qu.:36.10   1st Qu.:129.5   1st Qu.:0  
 Median :12434   Median :246.8   Median :36.21   Median :129.6   Median :0  
 Mean   :12434   Mean   :246.8   Mean   :36.13   Mean   :129.6   Mean   :0  
 3rd Qu.:18650   3rd Qu.:247.6   3rd Qu.:36.27   3rd Qu.:129.8   3rd Qu.:0  
 Max.   :24866   Max.   :248.3   Max.   :36.43   Max.   :130.0   Max.   :0  
     stemp          salinity        density1         density2     
 Min.   :22.10   Min.   :10.15   Min.   : 4.667   Min.   : 4.667  
 1st Qu.:24.72   1st Qu.:30.56   1st Qu.:20.009   1st Qu.:20.009  
 Median :24.95   Median :31.08   Median :20.328   Median :20.328  
 Mean   :24.95   Mean   :30.86   Mean   :20.228   Mean   :20.228  
 3rd Qu.:25.18   3rd Qu.:31.41   3rd Qu.:20.672   3rd Qu.:20.672  
 Max.   :30.48   Max.   :33.34   Max.   :21.954   Max.   :21.954  
     soundv       sal_flag          stemp_flag          sa_flag         
 Min.   :1507   Length:24866       Length:24866       Length:24866      
 1st Qu.:1529   Class :character   Class :character   Class :character  
 Median :1530   Mode  :character   Mode  :character   Mode  :character  
 Mean   :1530                                                           
 3rd Qu.:1531                                                           
 Max.   :1543                                                           

Quantile values of Seawater temperature

quantile(df1$stemp)
     0%     25%     50%     75%    100% 
22.0952 24.7208 24.9513 25.1791 30.4806 

IQR Calcuation

UpperQ = fivenum(df1$stemp)[4]
LowerQ = fivenum(df1$stemp)[2]
IQR = UpperQ - LowerQ
upperOutlier = df1$stemp[ which( df1$stemp > UpperQ + IQR * 1.5) ]
lowerOutlier = df1$stemp[ which( df1$stemp < LowerQ - IQR * 1.5) ]

IQR value :

IQR
[1] 0.4583
length(upperOutlier)
[1] 422
length(lowerOutlier)
[1] 686

IQR Outlier detect & insert flags..

for (i in 1:maxr) {
  if (stemp[i] > UpperQ + IQR * 1.5 | stemp[i] < LowerQ - IQR * 1.5) {
    df1$stemp_flag[i] <-"T"
  }
  else {
    df1$stemp_flag[i] <-"F"
  }
}

Visualizations data..

#par(mfrow=c(3,1))
boxplot(df1$stemp, horizontal = T, col="lightgrey", xlab="Seawater temperature", ylab="", main="Boxplot of Seawater temperature")

hist(df1$stemp, breaks=50, probability = TRUE, col="lightgrey", main="Histogram of Seawater temperature")

#lines(density(df1$stemp), col="red", lwd=2)
plot(df1$no,  df1$stemp, type="p", main="Scatterplot of Seawater temperature", pch=21)

ggplot(df1, aes(x=no, y=stemp, col=sal_flag)) +  geom_point(size=1) +
  scale_color_manual(values=c("black", "red")) +ggtitle("Sea temperature Plot")

#Ployly
plot_ly(df1, x=no, y=stemp, mode="markers", color = sal_flag, colors=c("black", "red"))
No trace type specified:
  Based on info supplied, a 'scatter' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#scatter
No trace type specified:
  Based on info supplied, a 'scatter' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#scatter

염분데이터 Outlier 분석 및 가시화

Quantile values of Seawater temperature

quantile(df1$salinity)
      0%      25%      50%      75%     100% 
10.14620 30.56385 31.08110 31.41320 33.33750 

IQR Calcuation

UpperQ = fivenum(df1$salinity)[4]
LowerQ = fivenum(df1$salinity)[2]
IQR = UpperQ - LowerQ
upperOutlier = df1$salinity[ which( df1$salinity > UpperQ + IQR * 1.5) ]
lowerOutlier = df1$salinity[ which( df1$salinity < LowerQ - IQR * 1.5) ]

IQR, Number of upperOutlier & lowerOutlier values :

IQR
[1] 0.8494
length(upperOutlier)
[1] 2
length(lowerOutlier)
[1] 985

IQR Outlier detect & insert flags..

for (i in 1:maxr) {
  if (sal[i] > UpperQ + IQR * 1.5 | sal[i] < LowerQ - IQR * 1.5) {
    df1$sal_flag[i] <-"T"
  }
  else {
    df1$sal_flag[i] <-"F"
  }
}

Visualization data….

boxplot(df1$salinity, horizontal = T, col="lightgrey", xlab="Salinity", ylab="", main="Boxplot of Salinity")

hist(df1$salinity, breaks=50, probability = TRUE, col="lightgrey", main="Histogram of Salinity")

plot(df1$no,  df1$salinity, type="p", main="Scatterplot of Salinity", pch=21)

ggplot(df1, aes(x=no, y=salinity, col=sal_flag)) +  geom_point(size=1) +
  scale_color_manual(values=c("black", "red"))+ggtitle("Salinity Plot")

#Ployly
plot_ly(df1, x=no, y=salinity, mode="markers", color = sal_flag, colors=c("black", "red"))
No trace type specified:
  Based on info supplied, a 'scatter' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#scatter
No trace type specified:
  Based on info supplied, a 'scatter' trace seems appropriate.
  Read more about this trace type -> https://plot.ly/r/reference/#scatter
LS0tCnRpdGxlOiAiSVFS7J2EIOydtOyaqe2VnCDsiJjsmKgsIOyXvOu2hOuNsOydtO2EsCDrtoTshJ0g67CPIOyVhOybg+udvOydtOyWtCDqsoDstpwiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KIyMjIDIwMTYuIDEyLiAxMyDtlbTslpHsnpDro4zsi6QKIyMjIOuMgOyDgeyekOujjCA6IDIwMTYwOV9QSF9IVVNTLmNudgoKIyMjIOuNsOydtO2EsCDqtazsobAoUiBkYXRhZnJhbWUpCmBgYHtyfQpzdHIoZGYxKQpuYW1lcyhkZjEpWzFdPC0ibm8iCm1heHIgPC1ucm93KGRmMSkKc2FsPC1kZjEkc2FsaW5pdHkKc3RlbXA8LWRmMSRzdGVtcApoZWFkKGRmMSkKc3VtbWFyeShkZjEpCmBgYAojIyMgUXVhbnRpbGUgdmFsdWVzIG9mIFNlYXdhdGVyIHRlbXBlcmF0dXJlCmBgYHtyfQpxdWFudGlsZShkZjEkc3RlbXApCmBgYAojIyMgSVFSIENhbGN1YXRpb24KYGBge3J9ClVwcGVyUSA9IGZpdmVudW0oZGYxJHN0ZW1wKVs0XQpMb3dlclEgPSBmaXZlbnVtKGRmMSRzdGVtcClbMl0KSVFSID0gVXBwZXJRIC0gTG93ZXJRCnVwcGVyT3V0bGllciA9IGRmMSRzdGVtcFsgd2hpY2goIGRmMSRzdGVtcCA+IFVwcGVyUSArIElRUiAqIDEuNSkgXQpsb3dlck91dGxpZXIgPSBkZjEkc3RlbXBbIHdoaWNoKCBkZjEkc3RlbXAgPCBMb3dlclEgLSBJUVIgKiAxLjUpIF0KYGBgCiMjIyBJUVIgdmFsdWUgOgpgYGB7cn0KSVFSCmxlbmd0aCh1cHBlck91dGxpZXIpCmxlbmd0aChsb3dlck91dGxpZXIpCmBgYAoKIyMjIElRUiBPdXRsaWVyIGRldGVjdCAmIGluc2VydCBmbGFncy4uCmBgYHtyfQpmb3IgKGkgaW4gMTptYXhyKSB7CiAgaWYgKHN0ZW1wW2ldID4gVXBwZXJRICsgSVFSICogMS41IHwgc3RlbXBbaV0gPCBMb3dlclEgLSBJUVIgKiAxLjUpIHsKICAgIGRmMSRzdGVtcF9mbGFnW2ldIDwtIlQiCiAgfQogIGVsc2UgewogICAgZGYxJHN0ZW1wX2ZsYWdbaV0gPC0iRiIKICB9Cn0KYGBgCgojIyMgVmlzdWFsaXphdGlvbnMgZGF0YS4uCmBgYHtyfQojcGFyKG1mcm93PWMoMywxKSkKYm94cGxvdChkZjEkc3RlbXAsIGhvcml6b250YWwgPSBULCBjb2w9ImxpZ2h0Z3JleSIsIHhsYWI9IlNlYXdhdGVyIHRlbXBlcmF0dXJlIiwgeWxhYj0iIiwgbWFpbj0iQm94cGxvdCBvZiBTZWF3YXRlciB0ZW1wZXJhdHVyZSIpCmhpc3QoZGYxJHN0ZW1wLCBicmVha3M9NTAsIHByb2JhYmlsaXR5ID0gVFJVRSwgY29sPSJsaWdodGdyZXkiLCBtYWluPSJIaXN0b2dyYW0gb2YgU2Vhd2F0ZXIgdGVtcGVyYXR1cmUiKQojbGluZXMoZGVuc2l0eShkZjEkc3RlbXApLCBjb2w9InJlZCIsIGx3ZD0yKQpwbG90KGRmMSRubywgIGRmMSRzdGVtcCwgdHlwZT0icCIsIG1haW49IlNjYXR0ZXJwbG90IG9mIFNlYXdhdGVyIHRlbXBlcmF0dXJlIiwgcGNoPTIxKQpgYGAKCmBgYHtyfQpnZ3Bsb3QoZGYxLCBhZXMoeD1ubywgeT1zdGVtcCwgY29sPXNhbF9mbGFnKSkgKyAgZ2VvbV9wb2ludChzaXplPTEpICsKICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzPWMoImJsYWNrIiwgInJlZCIpKSArZ2d0aXRsZSgiU2Vhd2F0ZXIgdGVtcGVyYXR1cmUgUGxvdCIpCmBgYAoKYGBge3J9CiNQbG95bHkKcGxvdF9seShkZjEsIHg9bm8sIHk9c3RlbXAsIG1vZGU9Im1hcmtlcnMiLCBjb2xvciA9IHNhbF9mbGFnLCBjb2xvcnM9YygiYmxhY2siLCAicmVkIikpCmBgYAoKIyDsl7zrtoTrjbDsnbTthLAgT3V0bGllciDrtoTshJ0g67CPIOqwgOyLnO2ZlAoKIyMjIFF1YW50aWxlIHZhbHVlcyBvZiBTZWF3YXRlciB0ZW1wZXJhdHVyZQpgYGB7cn0KcXVhbnRpbGUoZGYxJHNhbGluaXR5KQpgYGAKIyMjIElRUiBDYWxjdWF0aW9uCmBgYHtyfQpVcHBlclEgPSBmaXZlbnVtKGRmMSRzYWxpbml0eSlbNF0KTG93ZXJRID0gZml2ZW51bShkZjEkc2FsaW5pdHkpWzJdCklRUiA9IFVwcGVyUSAtIExvd2VyUQp1cHBlck91dGxpZXIgPSBkZjEkc2FsaW5pdHlbIHdoaWNoKCBkZjEkc2FsaW5pdHkgPiBVcHBlclEgKyBJUVIgKiAxLjUpIF0KbG93ZXJPdXRsaWVyID0gZGYxJHNhbGluaXR5WyB3aGljaCggZGYxJHNhbGluaXR5IDwgTG93ZXJRIC0gSVFSICogMS41KSBdCmBgYAojIyMgSVFSLCBOdW1iZXIgb2YgdXBwZXJPdXRsaWVyICYgbG93ZXJPdXRsaWVyIHZhbHVlcyA6CmBgYHtyfQpJUVIKbGVuZ3RoKHVwcGVyT3V0bGllcikKbGVuZ3RoKGxvd2VyT3V0bGllcikKYGBgCgojIyMgSVFSIE91dGxpZXIgZGV0ZWN0ICYgaW5zZXJ0IGZsYWdzLi4KYGBge3J9CmZvciAoaSBpbiAxOm1heHIpIHsKICBpZiAoc2FsW2ldID4gVXBwZXJRICsgSVFSICogMS41IHwgc2FsW2ldIDwgTG93ZXJRIC0gSVFSICogMS41KSB7CiAgICBkZjEkc2FsX2ZsYWdbaV0gPC0iVCIKICB9CiAgZWxzZSB7CiAgICBkZjEkc2FsX2ZsYWdbaV0gPC0iRiIKICB9Cn0KYGBgCgojIFZpc3VhbGl6YXRpb24gZGF0YS4uLi4KYGBge3J9CmJveHBsb3QoZGYxJHNhbGluaXR5LCBob3Jpem9udGFsID0gVCwgY29sPSJsaWdodGdyZXkiLCB4bGFiPSJTYWxpbml0eSIsIHlsYWI9IiIsIG1haW49IkJveHBsb3Qgb2YgU2FsaW5pdHkiKQpoaXN0KGRmMSRzYWxpbml0eSwgYnJlYWtzPTUwLCBwcm9iYWJpbGl0eSA9IFRSVUUsIGNvbD0ibGlnaHRncmV5IiwgbWFpbj0iSGlzdG9ncmFtIG9mIFNhbGluaXR5IikKcGxvdChkZjEkbm8sICBkZjEkc2FsaW5pdHksIHR5cGU9InAiLCBtYWluPSJTY2F0dGVycGxvdCBvZiBTYWxpbml0eSIsIHBjaD0yMSkKYGBgCgpgYGB7cn0KZ2dwbG90KGRmMSwgYWVzKHg9bm8sIHk9c2FsaW5pdHksIGNvbD1zYWxfZmxhZykpICsgIGdlb21fcG9pbnQoc2l6ZT0xKSArCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcz1jKCJibGFjayIsICJyZWQiKSkrZ2d0aXRsZSgiU2FsaW5pdHkgUGxvdCIpCmBgYAoKYGBge3J9CiNQbG95bHkKcGxvdF9seShkZjEsIHg9bm8sIHk9c2FsaW5pdHksIG1vZGU9Im1hcmtlcnMiLCBjb2xvciA9IHNhbF9mbGFnLCBjb2xvcnM9YygiYmxhY2siLCAicmVkIikpCmBgYAoKCg==