seg_data <- read.csv("C:/Users/ramya/Downloads/segmentation2.csv", header=TRUE)
View(seg_data)
colnames(seg_data)<-c("NAME","REGION.CENTROID.COL","REGION.CENTROID.ROW ","REGION.PIXEL.COUNT","SHORT.LINE.DENSITY.5","SHORT.LINE.DENSITY.2" ,"VEDGE.MEAN"
,"VEDGE.SD" ,"HEDGE.MEAN","HEDGE.SD","INTENSITY.MEAN","RAWRED.MEAN","RAWBLUE.MEAN","RAWGREEN.MEAN","EXRED.MEAN","EXBLUE.MEAN","EXGREEN.MEAN","VALUE.MEAN",
"SATURATION.MEAN","HUE.MEAN" )
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
names(seg_data)
## [1] "NAME" "REGION.CENTROID.COL" "REGION.CENTROID.ROW "
## [4] "REGION.PIXEL.COUNT" "SHORT.LINE.DENSITY.5" "SHORT.LINE.DENSITY.2"
## [7] "VEDGE.MEAN" "VEDGE.SD" "HEDGE.MEAN"
## [10] "HEDGE.SD" "INTENSITY.MEAN" "RAWRED.MEAN"
## [13] "RAWBLUE.MEAN" "RAWGREEN.MEAN" "EXRED.MEAN"
## [16] "EXBLUE.MEAN" "EXGREEN.MEAN" "VALUE.MEAN"
## [19] "SATURATION.MEAN" "HUE.MEAN"
str(seg_data)
## 'data.frame': 210 obs. of 20 variables:
## $ NAME : Factor w/ 7 levels "BRICKFACE","CEMENT",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ REGION.CENTROID.COL : int 140 188 105 34 39 16 26 14 11 85 ...
## $ REGION.CENTROID.ROW : int 125 133 139 137 111 128 67 110 108 101 ...
## $ REGION.PIXEL.COUNT : int 9 9 9 9 9 9 9 9 9 9 ...
## $ SHORT.LINE.DENSITY.5: num 0 0 0 0 0 ...
## $ SHORT.LINE.DENSITY.2: num 0 0 0 0 0 0 0 0 0 0 ...
## $ VEDGE.MEAN : num 0.278 0.333 0.278 0.5 0.722 ...
## $ VEDGE.SD : num 0.063 0.267 0.107 0.167 0.374 ...
## $ HEDGE.MEAN : num 0.667 0.5 0.833 1.111 0.889 ...
## $ HEDGE.SD : num 0.3111 0.0778 0.5222 0.4741 0.4296 ...
## $ INTENSITY.MEAN : num 6.19 6.67 6.11 5.85 6.04 ...
## $ RAWRED.MEAN : num 7.33 8.33 7.56 7.78 7 ...
## $ RAWBLUE.MEAN : num 7.67 7.78 7.22 6.44 7.67 ...
## $ RAWGREEN.MEAN : num 3.56 3.89 3.56 3.33 3.44 ...
## $ EXRED.MEAN : num 3.44 5 4.33 5.78 2.89 ...
## $ EXBLUE.MEAN : num 4.44 3.33 3.33 1.78 4.89 ...
## $ EXGREEN.MEAN : num -7.89 -8.33 -7.67 -7.56 -7.78 ...
## $ VALUE.MEAN : num 7.78 8.44 7.56 7.78 7.89 ...
## $ SATURATION.MEAN : num 0.546 0.539 0.533 0.574 0.563 ...
## $ HUE.MEAN : num -1.122 -0.925 -0.966 -0.744 -1.176 ...
glimpse(seg_data)
## Observations: 210
## Variables: 20
## $ NAME <fct> BRICKFACE, BRICKFACE, BRICKFACE, BRICKF...
## $ REGION.CENTROID.COL <int> 140, 188, 105, 34, 39, 16, 26, 14, 11, ...
## $ `REGION.CENTROID.ROW ` <int> 125, 133, 139, 137, 111, 128, 67, 110, ...
## $ REGION.PIXEL.COUNT <int> 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, ...
## $ SHORT.LINE.DENSITY.5 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000...
## $ SHORT.LINE.DENSITY.2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ VEDGE.MEAN <dbl> 0.2777779, 0.3333333, 0.2777778, 0.5000...
## $ VEDGE.SD <dbl> 0.06296301, 0.26666674, 0.10740744, 0.1...
## $ HEDGE.MEAN <dbl> 0.6666667, 0.5000000, 0.8333333, 1.1111...
## $ HEDGE.SD <dbl> 0.31111118, 0.07777774, 0.52222216, 0.4...
## $ INTENSITY.MEAN <dbl> 6.185185, 6.666666, 6.111111, 5.851852,...
## $ RAWRED.MEAN <dbl> 7.333334, 8.333334, 7.555555, 7.777778,...
## $ RAWBLUE.MEAN <dbl> 7.666666, 7.777778, 7.222222, 6.444445,...
## $ RAWGREEN.MEAN <dbl> 3.555556, 3.888889, 3.555556, 3.333333,...
## $ EXRED.MEAN <dbl> 3.4444444, 5.0000000, 4.3333335, 5.7777...
## $ EXBLUE.MEAN <dbl> 4.4444447, 3.3333333, 3.3333333, 1.7777...
## $ EXGREEN.MEAN <dbl> -7.888889, -8.333333, -7.666666, -7.555...
## $ VALUE.MEAN <dbl> 7.777778, 8.444445, 7.555555, 7.777778,...
## $ SATURATION.MEAN <dbl> 0.5456349, 0.5385802, 0.5326279, 0.5736...
## $ HUE.MEAN <dbl> -1.1218182, -0.9248173, -0.9659458, -0....
dim(seg_data)
## [1] 210 20
levels(seg_data$NAME)
## [1] "BRICKFACE" "CEMENT" "FOLIAGE" "GRASS" "PATH" "SKY"
## [7] "WINDOW"
seg_data<-na.omit(seg_data)
library(ggplot2)
seg_intensity_mean<-seg_data %>% filter(INTENSITY.MEAN<=80)
ggplot(data=seg_intensity_mean,aes(x=NAME,y=INTENSITY.MEAN,color=NAME))+geom_col()+expand_limits(y=0)

seg_intensity_meanabove80<-seg_data %>% filter(INTENSITY.MEAN>80)
ggplot(data=seg_intensity_meanabove80,aes(x=NAME,y=INTENSITY.MEAN,color=NAME))+geom_col()+expand_limits(y=0)

ggplot(seg_data,aes(x=NAME,y=RAWRED.MEAN))+geom_boxplot()

ggplot(seg_data,aes(x=NAME,y=RAWGREEN.MEAN))+geom_boxplot()

ggplot(seg_data,aes(x=NAME,y=RAWBLUE.MEAN))+geom_boxplot()

ggplot(seg_data, aes(x = HUE.MEAN)) + geom_dotplot(dotsize = 0.4,binwidth = 0.5)

ggplot(seg_data,aes(x=REGION.CENTROID.COL,fill=as.factor(NAME)))+geom_density(alpha=0.4)

ggplot(seg_data,aes(x=SHORT.LINE.DENSITY.5))+geom_bar()+facet_wrap(~NAME)

ggplot(seg_data,aes(x=SHORT.LINE.DENSITY.2))+geom_bar()+facet_wrap(~NAME)

intensity_mean<-seg_data %>%filter(INTENSITY.MEAN>100 )
ggplot(intensity_mean,aes(x=VEDGE.MEAN,y=HEDGE.MEAN))+geom_point()+geom_text(label=rownames(intensity_mean))+stat_ellipse()+ggtitle("vedge mean vs hedge mean")

p <- ggplot(seg_data, aes(INTENSITY.MEAN,NAME, color = NAME))+ geom_point()
p + stat_ellipse()
## Warning: Computation failed in `stat_ellipse()`:
## missing value where TRUE/FALSE needed

ggplot(seg_data, aes(x =SATURATION.MEAN, y =HUE.MEAN)) + geom_violin()

ggplot(seg_data, aes(x =NAME, y =HUE.MEAN,fill=REGION.PIXEL.COUNT)) + geom_violin()

p<-ggplot(seg_data,aes(x=SATURATION.MEAN, fill=NAME, color=NAME))+geom_histogram(position="identity", alpha=0.5, binwidth = 1)
p

library(caTools)
seg_data=seg_data[,11:14]
split=sample.split(seg_data$INTENSITY.MEAN,SplitRatio=0.8)
training_set=subset(seg_data,split==T)
test_set=subset(seg_data,split==F)
View(training_set)
View(test_set)
training_set=scale(training_set)
test_set=scale(test_set)
lin_reg=lm(formula=INTENSITY.MEAN~RAWRED.MEAN ,data=seg_data)
summary(lin_reg)
##
## Call:
## lm(formula = INTENSITY.MEAN ~ RAWRED.MEAN, data = seg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.4857 -1.1756 0.0926 1.1669 8.3629
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.279105 0.220424 5.803 2.4e-08 ***
## RAWRED.MEAN 1.086271 0.004553 238.593 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.339 on 208 degrees of freedom
## Multiple R-squared: 0.9964, Adjusted R-squared: 0.9963
## F-statistic: 5.693e+04 on 1 and 208 DF, p-value: < 2.2e-16
ggplot()+geom_point(aes(x=seg_data$RAWRED.MEAN,y=seg_data$INTENSITY.MEAN),color="red")+
geom_line(aes(x=seg_data$RAWRED.MEAN,y=predict(lin_reg,newdata=seg_data)),color="blue")+ggtitle("linear model")

plot(seg_data$RAWRED.MEAN,seg_data$INTENSITY.MEAN)

seg_data$RAWRED.MEAN1=seg_data$RAWRED.MEAN^4
seg_data$RAWRED.MEAN8=seg_data$RAWRED.MEAN^8
seg_data$RAWRED.MEAN50=seg_data$RAWRED.MEAN^30
ploy_reg=lm(formula=INTENSITY.MEAN~.,data=seg_data)
summary(ploy_reg)
##
## Call:
## lm(formula = INTENSITY.MEAN ~ ., data = seg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.023e-06 -4.112e-07 1.040e-08 3.740e-07 9.396e-06
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.382e-09 2.201e-07 -4.300e-02 0.9660
## RAWRED.MEAN 3.333e-01 5.197e-08 6.414e+06 <2e-16 ***
## RAWBLUE.MEAN 3.333e-01 3.096e-08 1.077e+07 <2e-16 ***
## RAWGREEN.MEAN 3.333e-01 3.460e-08 9.634e+06 <2e-16 ***
## RAWRED.MEAN1 2.635e-14 1.472e-14 1.791e+00 0.0749 .
## RAWRED.MEAN8 -1.566e-22 5.884e-23 -2.662e+00 0.0084 **
## RAWRED.MEAN50 3.262e-70 3.356e-70 9.720e-01 0.3322
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.837e-06 on 203 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.544e+16 on 6 and 203 DF, p-value: < 2.2e-16
ggplot()+geom_point(aes(x=seg_data$RAWRED.MEAN,y=seg_data$INTENSITY.MEAN),color="red")+geom_line(aes(x=seg_data$RAWRED.MEAN,y=predict(ploy_reg,newdata=seg_data)),color="blue")+ggtitle("polynomial regression")

plot(seg_data$RAWRED.MEAN,seg_data$INTENSITY.MEAN)
