seg_data <- read.csv("C:/Users/ramya/Downloads/segmentation2.csv", header=TRUE)
View(seg_data)
colnames(seg_data)<-c("NAME","REGION.CENTROID.COL","REGION.CENTROID.ROW ","REGION.PIXEL.COUNT","SHORT.LINE.DENSITY.5","SHORT.LINE.DENSITY.2" ,"VEDGE.MEAN"          
,"VEDGE.SD" ,"HEDGE.MEAN","HEDGE.SD","INTENSITY.MEAN","RAWRED.MEAN","RAWBLUE.MEAN","RAWGREEN.MEAN","EXRED.MEAN","EXBLUE.MEAN","EXGREEN.MEAN","VALUE.MEAN",
"SATURATION.MEAN","HUE.MEAN" )
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
names(seg_data)
##  [1] "NAME"                 "REGION.CENTROID.COL"  "REGION.CENTROID.ROW "
##  [4] "REGION.PIXEL.COUNT"   "SHORT.LINE.DENSITY.5" "SHORT.LINE.DENSITY.2"
##  [7] "VEDGE.MEAN"           "VEDGE.SD"             "HEDGE.MEAN"          
## [10] "HEDGE.SD"             "INTENSITY.MEAN"       "RAWRED.MEAN"         
## [13] "RAWBLUE.MEAN"         "RAWGREEN.MEAN"        "EXRED.MEAN"          
## [16] "EXBLUE.MEAN"          "EXGREEN.MEAN"         "VALUE.MEAN"          
## [19] "SATURATION.MEAN"      "HUE.MEAN"
str(seg_data)
## 'data.frame':    210 obs. of  20 variables:
##  $ NAME                : Factor w/ 7 levels "BRICKFACE","CEMENT",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ REGION.CENTROID.COL : int  140 188 105 34 39 16 26 14 11 85 ...
##  $ REGION.CENTROID.ROW : int  125 133 139 137 111 128 67 110 108 101 ...
##  $ REGION.PIXEL.COUNT  : int  9 9 9 9 9 9 9 9 9 9 ...
##  $ SHORT.LINE.DENSITY.5: num  0 0 0 0 0 ...
##  $ SHORT.LINE.DENSITY.2: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ VEDGE.MEAN          : num  0.278 0.333 0.278 0.5 0.722 ...
##  $ VEDGE.SD            : num  0.063 0.267 0.107 0.167 0.374 ...
##  $ HEDGE.MEAN          : num  0.667 0.5 0.833 1.111 0.889 ...
##  $ HEDGE.SD            : num  0.3111 0.0778 0.5222 0.4741 0.4296 ...
##  $ INTENSITY.MEAN      : num  6.19 6.67 6.11 5.85 6.04 ...
##  $ RAWRED.MEAN         : num  7.33 8.33 7.56 7.78 7 ...
##  $ RAWBLUE.MEAN        : num  7.67 7.78 7.22 6.44 7.67 ...
##  $ RAWGREEN.MEAN       : num  3.56 3.89 3.56 3.33 3.44 ...
##  $ EXRED.MEAN          : num  3.44 5 4.33 5.78 2.89 ...
##  $ EXBLUE.MEAN         : num  4.44 3.33 3.33 1.78 4.89 ...
##  $ EXGREEN.MEAN        : num  -7.89 -8.33 -7.67 -7.56 -7.78 ...
##  $ VALUE.MEAN          : num  7.78 8.44 7.56 7.78 7.89 ...
##  $ SATURATION.MEAN     : num  0.546 0.539 0.533 0.574 0.563 ...
##  $ HUE.MEAN            : num  -1.122 -0.925 -0.966 -0.744 -1.176 ...
glimpse(seg_data)
## Observations: 210
## Variables: 20
## $ NAME                   <fct> BRICKFACE, BRICKFACE, BRICKFACE, BRICKF...
## $ REGION.CENTROID.COL    <int> 140, 188, 105, 34, 39, 16, 26, 14, 11, ...
## $ `REGION.CENTROID.ROW ` <int> 125, 133, 139, 137, 111, 128, 67, 110, ...
## $ REGION.PIXEL.COUNT     <int> 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, ...
## $ SHORT.LINE.DENSITY.5   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000...
## $ SHORT.LINE.DENSITY.2   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ VEDGE.MEAN             <dbl> 0.2777779, 0.3333333, 0.2777778, 0.5000...
## $ VEDGE.SD               <dbl> 0.06296301, 0.26666674, 0.10740744, 0.1...
## $ HEDGE.MEAN             <dbl> 0.6666667, 0.5000000, 0.8333333, 1.1111...
## $ HEDGE.SD               <dbl> 0.31111118, 0.07777774, 0.52222216, 0.4...
## $ INTENSITY.MEAN         <dbl> 6.185185, 6.666666, 6.111111, 5.851852,...
## $ RAWRED.MEAN            <dbl> 7.333334, 8.333334, 7.555555, 7.777778,...
## $ RAWBLUE.MEAN           <dbl> 7.666666, 7.777778, 7.222222, 6.444445,...
## $ RAWGREEN.MEAN          <dbl> 3.555556, 3.888889, 3.555556, 3.333333,...
## $ EXRED.MEAN             <dbl> 3.4444444, 5.0000000, 4.3333335, 5.7777...
## $ EXBLUE.MEAN            <dbl> 4.4444447, 3.3333333, 3.3333333, 1.7777...
## $ EXGREEN.MEAN           <dbl> -7.888889, -8.333333, -7.666666, -7.555...
## $ VALUE.MEAN             <dbl> 7.777778, 8.444445, 7.555555, 7.777778,...
## $ SATURATION.MEAN        <dbl> 0.5456349, 0.5385802, 0.5326279, 0.5736...
## $ HUE.MEAN               <dbl> -1.1218182, -0.9248173, -0.9659458, -0....
dim(seg_data)
## [1] 210  20
levels(seg_data$NAME)
## [1] "BRICKFACE" "CEMENT"    "FOLIAGE"   "GRASS"     "PATH"      "SKY"      
## [7] "WINDOW"
seg_data<-na.omit(seg_data)
library(ggplot2)
seg_intensity_mean<-seg_data %>% filter(INTENSITY.MEAN<=80)
ggplot(data=seg_intensity_mean,aes(x=NAME,y=INTENSITY.MEAN,color=NAME))+geom_col()+expand_limits(y=0)

seg_intensity_meanabove80<-seg_data %>% filter(INTENSITY.MEAN>80)
ggplot(data=seg_intensity_meanabove80,aes(x=NAME,y=INTENSITY.MEAN,color=NAME))+geom_col()+expand_limits(y=0)

ggplot(seg_data,aes(x=NAME,y=RAWRED.MEAN))+geom_boxplot()

ggplot(seg_data,aes(x=NAME,y=RAWGREEN.MEAN))+geom_boxplot()

ggplot(seg_data,aes(x=NAME,y=RAWBLUE.MEAN))+geom_boxplot()

ggplot(seg_data, aes(x = HUE.MEAN)) + geom_dotplot(dotsize = 0.4,binwidth = 0.5)

ggplot(seg_data,aes(x=REGION.CENTROID.COL,fill=as.factor(NAME)))+geom_density(alpha=0.4)

ggplot(seg_data,aes(x=SHORT.LINE.DENSITY.5))+geom_bar()+facet_wrap(~NAME)

ggplot(seg_data,aes(x=SHORT.LINE.DENSITY.2))+geom_bar()+facet_wrap(~NAME)

intensity_mean<-seg_data %>%filter(INTENSITY.MEAN>100 )
ggplot(intensity_mean,aes(x=VEDGE.MEAN,y=HEDGE.MEAN))+geom_point()+geom_text(label=rownames(intensity_mean))+stat_ellipse()+ggtitle("vedge mean vs hedge mean")

p <- ggplot(seg_data, aes(INTENSITY.MEAN,NAME, color = NAME))+ geom_point()
p + stat_ellipse()
## Warning: Computation failed in `stat_ellipse()`:
## missing value where TRUE/FALSE needed

ggplot(seg_data, aes(x =SATURATION.MEAN, y =HUE.MEAN)) + geom_violin()

ggplot(seg_data, aes(x =NAME, y =HUE.MEAN,fill=REGION.PIXEL.COUNT)) + geom_violin()

p<-ggplot(seg_data,aes(x=SATURATION.MEAN, fill=NAME, color=NAME))+geom_histogram(position="identity", alpha=0.5, binwidth = 1)
p

library(caTools)
seg_data=seg_data[,11:14]
split=sample.split(seg_data$INTENSITY.MEAN,SplitRatio=0.8)
training_set=subset(seg_data,split==T)
test_set=subset(seg_data,split==F)
View(training_set)
View(test_set)
training_set=scale(training_set)
test_set=scale(test_set)
lin_reg=lm(formula=INTENSITY.MEAN~RAWRED.MEAN ,data=seg_data)
summary(lin_reg)
## 
## Call:
## lm(formula = INTENSITY.MEAN ~ RAWRED.MEAN, data = seg_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.4857 -1.1756  0.0926  1.1669  8.3629 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.279105   0.220424   5.803  2.4e-08 ***
## RAWRED.MEAN 1.086271   0.004553 238.593  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.339 on 208 degrees of freedom
## Multiple R-squared:  0.9964, Adjusted R-squared:  0.9963 
## F-statistic: 5.693e+04 on 1 and 208 DF,  p-value: < 2.2e-16
ggplot()+geom_point(aes(x=seg_data$RAWRED.MEAN,y=seg_data$INTENSITY.MEAN),color="red")+
  geom_line(aes(x=seg_data$RAWRED.MEAN,y=predict(lin_reg,newdata=seg_data)),color="blue")+ggtitle("linear model")

plot(seg_data$RAWRED.MEAN,seg_data$INTENSITY.MEAN)

seg_data$RAWRED.MEAN1=seg_data$RAWRED.MEAN^4
seg_data$RAWRED.MEAN8=seg_data$RAWRED.MEAN^8
seg_data$RAWRED.MEAN50=seg_data$RAWRED.MEAN^30
ploy_reg=lm(formula=INTENSITY.MEAN~.,data=seg_data)
summary(ploy_reg)
## 
## Call:
## lm(formula = INTENSITY.MEAN ~ ., data = seg_data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -7.023e-06 -4.112e-07  1.040e-08  3.740e-07  9.396e-06 
## 
## Coefficients:
##                 Estimate Std. Error    t value Pr(>|t|)    
## (Intercept)   -9.382e-09  2.201e-07 -4.300e-02   0.9660    
## RAWRED.MEAN    3.333e-01  5.197e-08  6.414e+06   <2e-16 ***
## RAWBLUE.MEAN   3.333e-01  3.096e-08  1.077e+07   <2e-16 ***
## RAWGREEN.MEAN  3.333e-01  3.460e-08  9.634e+06   <2e-16 ***
## RAWRED.MEAN1   2.635e-14  1.472e-14  1.791e+00   0.0749 .  
## RAWRED.MEAN8  -1.566e-22  5.884e-23 -2.662e+00   0.0084 ** 
## RAWRED.MEAN50  3.262e-70  3.356e-70  9.720e-01   0.3322    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.837e-06 on 203 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.544e+16 on 6 and 203 DF,  p-value: < 2.2e-16
ggplot()+geom_point(aes(x=seg_data$RAWRED.MEAN,y=seg_data$INTENSITY.MEAN),color="red")+geom_line(aes(x=seg_data$RAWRED.MEAN,y=predict(ploy_reg,newdata=seg_data)),color="blue")+ggtitle("polynomial regression")

plot(seg_data$RAWRED.MEAN,seg_data$INTENSITY.MEAN)