rm(list=ls())
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages("GGally")
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
#install.packages("AppliedPredictiveModeling")
#install.packages("ggplot2")
#library(ggplot2)
ABALONE <- read_csv("C:/Users/cu_dv/Documents/Abalone_DATA.csv")
## Rows: 4177 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Sex
## dbl (8): Length, Diameter, Height, Whole weight, Shucked weight, Viscera wei...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ABALONE
## # A tibble: 4,177 × 9
##    Sex   Length Diameter Height `Whole weight` `Shucked weight` `Viscera weight`
##    <chr>  <dbl>    <dbl>  <dbl>          <dbl>            <dbl>            <dbl>
##  1 M      0.455    0.365  0.095          0.514           0.224            0.101 
##  2 M      0.35     0.265  0.09           0.226           0.0995           0.0485
##  3 F      0.53     0.42   0.135          0.677           0.256            0.142 
##  4 M      0.44     0.365  0.125          0.516           0.216            0.114 
##  5 I      0.33     0.255  0.08           0.205           0.0895           0.0395
##  6 I      0.425    0.3    0.095          0.352           0.141            0.0775
##  7 F      0.53     0.415  0.15           0.778           0.237            0.142 
##  8 F      0.545    0.425  0.125          0.768           0.294            0.150 
##  9 M      0.475    0.37   0.125          0.509           0.216            0.112 
## 10 F      0.55     0.44   0.15           0.894           0.314            0.151 
## # ℹ 4,167 more rows
## # ℹ 2 more variables: `Shell weight` <dbl>, Rings <dbl>
#Vaiable Descriptions
# Sex: Categorical, indicating Male (M), Female (F), or Infant (I).
# Physical Measurements (continuous): 
# Length-longest shell measurement
# diameter-Diameter perpendicular to the length 
# height-Height with the meat inside the shell
# Weights
# whole weight-Weight of whole abalone
# shucked weight-Weight of the meat 
# viscera weight-"Gut Weight(After Bleeding)
# shell weight- After being dried 
# Rings: Integer, representing the number of rings on the shell, which approximates the age in years when 1.5 is added. 

#Histogram of Continous Variables
hist(ABALONE$Length)

hist(ABALONE$Diameter)

hist(ABALONE$Height)

hist(ABALONE$`Whole weight`)

hist(ABALONE$`Shucked weight`)

hist(ABALONE$`Viscera weight`)

hist(ABALONE$`Shell weight`)

#Table of Categorical Variable
table(ABALONE$Sex)
## 
##    F    I    M 
## 1307 1342 1528
#Summary Statistics of the Continous Variables
summary(ABALONE$Length)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.075   0.450   0.545   0.524   0.615   0.815
summary(ABALONE$Diameter)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0550  0.3500  0.4250  0.4079  0.4800  0.6500
summary(ABALONE$Height)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.1150  0.1400  0.1395  0.1650  1.1300
summary(ABALONE$`Whole weight`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0020  0.4415  0.7995  0.8287  1.1530  2.8255
summary(ABALONE$`Shucked weight`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0010  0.1860  0.3360  0.3594  0.5020  1.4880
summary(ABALONE$`Viscera weight`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0005  0.0935  0.1710  0.1806  0.2530  0.7600
summary(ABALONE$`Shell weight`)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0015  0.1300  0.2340  0.2388  0.3290  1.0050
# ggplot(ABALONE,aes(x=Sex)) +geom_bar()+
#   ggtitle("FrequencyoftheSeasons")+coord_flip()

#Graph of Length vs Height
ggplot(ABALONE,aes(x=Length,y=Height)) + geom_point() +
   xlab("Length") + ylab("Height") +
   ggtitle("Relationship between Length and Height")

#Boxplot of Whole weight by Sex
 ggplot(ABALONE,aes(x=Sex,y=`Whole weight`)) + geom_boxplot()

#Scatterplot Matrix
 pairs(ABALONE[,2:8])

ggpairs(ABALONE[,2:8])

#Parallel Coordinate Plot
ggparcoord(ABALONE,columns=2:8,groupColumn="Sex")