Load Data

'data.frame':   4500 obs. of  11 variables:
 $ X                             : int  1 2 3 4 5 6 7 8 9 10 ...
 $ id                            : int  71 96 103 106 109 118 120 188 191 211 ...
 $ outliersPro                   : num  0.14002 0.06416 0.00631 0.00589 0.15473 ...
 $ estimated_rate_Before_Handling: num  1.316 0.444 0.296 0.511 1.309 ...
 $ estimated_rate_After_Q_b_F_C  : num  2.262 0.5 0.297 0.515 3.781 ...
 $ estimated_rate_After_mean     : num  3.96 0.623 0.302 0.524 6.258 ...
 $ estimated_rate_After_median   : num  4.473 0.648 0.303 0.525 6.83 ...
 $ Before_P_value                : num  0 0 1 0.404 0 ...
 $ Q_b_F_C_P_value               : num  0 0 1 0.579 0 ...
 $ mean_P_value                  : num  0 0 1 0.662 0.997 0 0 0 1 0.483 ...
 $ median_P_value                : num  0 0 1 0.637 0 ...

Univariae Visualization

Outliers Proportion

p1 <- results %>%   ggplot(aes(y = (outliersPro ))) +
  geom_boxplot() + labs(title = "Outliers Proportion")

p2 <- results %>%   ggplot(aes(x = (outliersPro ))) +
  geom_histogram() + labs(title = "Outliers Proportion")

gridExtra::grid.arrange(p1,p2 ,  ncol = 2 )
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Estimated Rate Before Handling

p1 <- results %>%   ggplot(aes(y = (estimated_rate_Before_Handling ))) +
  geom_boxplot() + labs(title = "Estimated Rate Before Handling")

p2 <- results %>%   ggplot(aes(x = (estimated_rate_Before_Handling ))) +
  geom_histogram() + labs(title = "Estimated Rate Before Handling" , subtitle = "scale X >> Log(10) "           ) + scale_x_log10()

gridExtra::grid.arrange(p1,p2 ,  ncol = 2 )
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qqnorm((results$estimated_rate_Before_Handling), pch = 1, frame = FALSE)
qqline((results$estimated_rate_Before_Handling), col = "steelblue", lwd = 2)

Estimated Rate After Handling {Mean}

p1 <- results %>%   ggplot(aes(y = (estimated_rate_After_mean      ))) +
  geom_boxplot() + labs(title = "Estimated Rate After Handling {Mean}")

p2 <- results %>%   ggplot(aes(x = (estimated_rate_After_mean      ))) +
  geom_histogram()  + labs(title = "Estimated Rate After Handling {Mean}"
                          , subtitle = "scale X >> Log(10) "
                           ) + scale_x_log10()
  

gridExtra::grid.arrange(p1,p2 ,  ncol = 2 )
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

results %>%   select(estimated_rate_Before_Handling , 
                     estimated_rate_After_Q_b_F_C , 
                     estimated_rate_After_mean , 
                     estimated_rate_After_median 
                     ) %>% 
  gather("Method" , "estimatedRate" , estimated_rate_Before_Handling , estimated_rate_After_Q_b_F_C , estimated_rate_After_mean ,estimated_rate_After_median  ) %>% 
  ggplot(aes(x = (estimatedRate))) + 
     geom_histogram( aes( fill  = as.factor(Method))) + 
   labs(title = "Estimated Rate After Handling "
                          , subtitle = "scale X >> Log(10) " ) + scale_x_log10()+
  

  facet_wrap(.~Method)
Error in select(., estimated_rate_Before_Handling, estimated_rate_After_Q_b_F_C,  : 
  unused arguments (estimated_rate_Before_Handling, estimated_rate_After_Q_b_F_C, estimated_rate_After_mean, estimated_rate_After_median)

Relation Between Outliers Proportion and P-value Before


p1 = results %>%
  mutate(fitted = ifelse(Before_P_value > 0.05,TRUE,FALSE)) %>% 
  ggplot(aes(x = results$outliersPro , y= Before_P_value)) + 
  geom_point(aes(color = fitted , alpha = .9)) +
    labs(title = "Outliers Proportion And P_value Before"
                             ) 

p2 = results %>%
  mutate(fitted = ifelse(Before_P_value > 0.05,TRUE,FALSE)) %>% 
  ggplot(aes(x= fitted)) + 
  geom_bar(aes(fill=fitted)) +
    labs(title = "Outliers Proportion And P_value Before"
                             )

gridExtra::grid.arrange(p1,p2 ,  ncol = 2 )

Relation Between Outliers Proportion and P-value After {Mean}


p1 = results %>%
  mutate(fitted = ifelse(mean_P_value > 0.05,TRUE,FALSE)) %>% 
  ggplot(aes(x = results$outliersPro , y= results$mean_P_value)) + 
  geom_point(aes(color = fitted , alpha = .5) ) +
    labs(title = "Outliers Proportion And P_value After"
                             )  

p2 = results %>%
  mutate(fitted = ifelse(mean_P_value > 0.05,TRUE,FALSE)) %>% 
  ggplot(aes(x= fitted)) + 
  geom_bar(aes(fill=fitted)) +
    labs(title = "Outliers Proportion And P_value After"
                             )

gridExtra::grid.arrange(p1,p2 ,  ncol = 2 )

Parametric Test for Association Between { Fitted Value Before And Fitted Values After {Mean} }


ddd <-results %>%
  mutate(fittedBefore = ifelse(Before_P_value > 0.05,TRUE,FALSE)) %>%
  mutate(fittedAfter = ifelse(mean_P_value > 0.05,TRUE,FALSE)) 

 
library(MASS)
tb1<-table(ddd$fittedBefore, ddd$fittedAfter)
tb1
       
        FALSE TRUE
  FALSE  1494 1590
  TRUE      4 1412
chisq.test(tb1)

    Pearson's Chi-squared test with Yates' continuity correction

data:  tb1
X-squared = 1011.4, df = 1, p-value < 2.2e-16

Relation Between Outliers Proportion and P-value After {Median}


p1 = results %>%
  mutate(fitted = ifelse(median_P_value   > 0.05,TRUE,FALSE)) %>% 
  ggplot(aes(x = results$outliersPro , y= results$median_P_value)) + 
  geom_point(aes(color = fitted , alpha = .5) ) +
    labs(title = "Outliers Proportion And P_value After"
                             )  

p2 = results %>%
  mutate(fitted = ifelse(median_P_value > 0.05,TRUE,FALSE)) %>% 
  ggplot(aes(x= fitted)) + 
  geom_bar(aes(fill=fitted)) +
    labs(title = "Outliers Proportion And P_value After"
                             )

gridExtra::grid.arrange(p1,p2 ,  ncol = 2 )



ddd <-results %>%
  mutate(fittedBefore = ifelse(Before_P_value > 0.05,TRUE,FALSE)) %>%
  mutate(fittedAfter = ifelse(median_P_value > 0.05,TRUE,FALSE)) 

 
library(MASS)
tb1<-table(ddd$fittedBefore, ddd$fittedAfter)
tb1
       
        FALSE TRUE
  FALSE  1761 1323
  TRUE      4 1412
chisq.test(tb1)

    Pearson's Chi-squared test with Yates' continuity correction

data:  tb1
X-squared = 1311.8, df = 1, p-value < 2.2e-16

Relation Between Outliers Proportion and P-value After {Q_b_F_C_P}


p1 = results %>%
  mutate(fitted = ifelse(Q_b_F_C_P_value   > 0.05,TRUE,FALSE)) %>% 
  ggplot(aes(x = results$outliersPro , y= results$Q_b_F_C_P_value)) + 
  geom_point(aes(color = fitted , alpha = .5) ) +
    labs(title = "Outliers Proportion And P_value After"
                             )  

p2 = results %>%
  mutate(fitted = ifelse(Q_b_F_C_P_value > 0.05,TRUE,FALSE)) %>% 
  ggplot(aes(x= fitted)) + 
  geom_bar(aes(fill=fitted)) +
    labs(title = "Outliers Proportion And P_value After"
                             )

gridExtra::grid.arrange(p1,p2 ,  ncol = 2 )



ddd <-results %>%
  mutate(fittedBefore = ifelse(Before_P_value > 0.05,TRUE,FALSE)) %>%
  mutate(fittedAfter = ifelse(Q_b_F_C_P_value > 0.05,TRUE,FALSE)) 

 

library(MASS)
tb1<-table(ddd$fittedBefore, ddd$fittedAfter)
tb1
       
        FALSE TRUE
  FALSE  1988 1096
  TRUE      0 1416
chisq.test(tb1)

    Pearson's Chi-squared test with Yates' continuity correction

data:  tb1
X-squared = 1632.5, df = 1, p-value < 2.2e-16
---
title: "Application Exponential Distribution"
output: html_notebook
---
 
 

```{r error=FALSE , warning=FALSE,echo=FALSE}
 library(tidyverse)  
 library(lubridate)
 library(plotly)
 library(hrbrthemes)
```

 
# Load Data 
```{r error=FALSE , warning=FALSE,echo=FALSE}
results <- read.csv("results.csv")
str(results)
```
# Univariae Visualization

## Outliers Proportion
```{r}
p1 <- results %>%   ggplot(aes(y = (outliersPro ))) +
  geom_boxplot() + labs(title = "Outliers Proportion")

p2 <- results %>%   ggplot(aes(x = (outliersPro ))) +
  geom_histogram() + labs(title = "Outliers Proportion")

gridExtra::grid.arrange(p1,p2 ,  ncol = 2 )

```


## Estimated Rate Before Handling
```{r}
p1 <- results %>%   ggplot(aes(y = (estimated_rate_Before_Handling ))) +
  geom_boxplot() + labs(title = "Estimated Rate Before Handling")

p2 <- results %>%   ggplot(aes(x = (estimated_rate_Before_Handling ))) +
  geom_histogram() + labs(title = "Estimated Rate Before Handling" , subtitle = "scale X >> Log(10) "           ) + scale_x_log10()

gridExtra::grid.arrange(p1,p2 ,  ncol = 2 )

```

```{r}
qqnorm((results$estimated_rate_Before_Handling), pch = 1, frame = FALSE)
qqline((results$estimated_rate_Before_Handling), col = "steelblue", lwd = 2)

```



## Estimated Rate After Handling {Mean}
```{r}
p1 <- results %>%   ggplot(aes(y = (estimated_rate_After_mean      ))) +
  geom_boxplot() + labs(title = "Estimated Rate After Handling {Mean}")

p2 <- results %>%   ggplot(aes(x = (estimated_rate_After_mean      ))) +
  geom_histogram()  + labs(title = "Estimated Rate After Handling {Mean}"
                          , subtitle = "scale X >> Log(10) "
                           ) + scale_x_log10()
  

gridExtra::grid.arrange(p1,p2 ,  ncol = 2 )

```



```{r}
results %>%   select(estimated_rate_Before_Handling , 
                     estimated_rate_After_Q_b_F_C , 
                     estimated_rate_After_mean , 
                     estimated_rate_After_median 
                     ) %>% 
  gather("Method" , "estimatedRate" , estimated_rate_Before_Handling , estimated_rate_After_Q_b_F_C , estimated_rate_After_mean ,estimated_rate_After_median  ) %>% 
  ggplot(aes(x = (estimatedRate))) + 
     geom_histogram( aes( fill  = as.factor(Method))) + 
   labs(title = "Estimated Rate After Handling "
                          , subtitle = "scale X >> Log(10) " ) + scale_x_log10()+
  

  facet_wrap(.~Method)


```



# Relation Between Outliers Proportion and P-value Before 
```{r}

p1 = results %>%
  mutate(fitted = ifelse(Before_P_value > 0.05,TRUE,FALSE)) %>% 
  ggplot(aes(x = results$outliersPro , y= Before_P_value)) + 
  geom_point(aes(color = fitted , alpha = .9)) +
    labs(title = "Outliers Proportion And P_value Before"
                             ) 

p2 = results %>%
  mutate(fitted = ifelse(Before_P_value > 0.05,TRUE,FALSE)) %>% 
  ggplot(aes(x= fitted)) + 
  geom_bar(aes(fill=fitted)) +
    labs(title = "Outliers Proportion And P_value Before"
                             )

gridExtra::grid.arrange(p1,p2 ,  ncol = 2 )
```


# Relation Between Outliers Proportion and P-value After {Mean} 
```{r}

p1 = results %>%
  mutate(fitted = ifelse(mean_P_value > 0.05,TRUE,FALSE)) %>% 
  ggplot(aes(x = results$outliersPro , y= results$mean_P_value)) + 
  geom_point(aes(color = fitted , alpha = .5) ) +
    labs(title = "Outliers Proportion And P_value After"
                             )  

p2 = results %>%
  mutate(fitted = ifelse(mean_P_value > 0.05,TRUE,FALSE)) %>% 
  ggplot(aes(x= fitted)) + 
  geom_bar(aes(fill=fitted)) +
    labs(title = "Outliers Proportion And P_value After"
                             )

gridExtra::grid.arrange(p1,p2 ,  ncol = 2 )

```


Parametric Test for Association Between { Fitted Value Before And Fitted Values After {Mean} }
```{r}

ddd <-results %>%
  mutate(fittedBefore = ifelse(Before_P_value > 0.05,TRUE,FALSE)) %>%
  mutate(fittedAfter = ifelse(mean_P_value > 0.05,TRUE,FALSE)) 

 
library(MASS)
tb1<-table(ddd$fittedBefore, ddd$fittedAfter)
tb1
chisq.test(tb1)

```



# Relation Between Outliers Proportion and P-value After {Median} 
```{r}

p1 = results %>%
  mutate(fitted = ifelse(median_P_value   > 0.05,TRUE,FALSE)) %>% 
  ggplot(aes(x = results$outliersPro , y= results$median_P_value)) + 
  geom_point(aes(color = fitted , alpha = .5) ) +
    labs(title = "Outliers Proportion And P_value After"
                             )  

p2 = results %>%
  mutate(fitted = ifelse(median_P_value > 0.05,TRUE,FALSE)) %>% 
  ggplot(aes(x= fitted)) + 
  geom_bar(aes(fill=fitted)) +
    labs(title = "Outliers Proportion And P_value After"
                             )

gridExtra::grid.arrange(p1,p2 ,  ncol = 2 )


ddd <-results %>%
  mutate(fittedBefore = ifelse(Before_P_value > 0.05,TRUE,FALSE)) %>%
  mutate(fittedAfter = ifelse(median_P_value > 0.05,TRUE,FALSE)) 

 
library(MASS)
tb1<-table(ddd$fittedBefore, ddd$fittedAfter)
tb1
chisq.test(tb1)


```




# Relation Between Outliers Proportion and P-value After {Q_b_F_C_P} 
```{r}

p1 = results %>%
  mutate(fitted = ifelse(Q_b_F_C_P_value   > 0.05,TRUE,FALSE)) %>% 
  ggplot(aes(x = results$outliersPro , y= results$Q_b_F_C_P_value)) + 
  geom_point(aes(color = fitted , alpha = .5) ) +
    labs(title = "Outliers Proportion And P_value After"
                             )  

p2 = results %>%
  mutate(fitted = ifelse(Q_b_F_C_P_value > 0.05,TRUE,FALSE)) %>% 
  ggplot(aes(x= fitted)) + 
  geom_bar(aes(fill=fitted)) +
    labs(title = "Outliers Proportion And P_value After"
                             )

gridExtra::grid.arrange(p1,p2 ,  ncol = 2 )


ddd <-results %>%
  mutate(fittedBefore = ifelse(Before_P_value > 0.05,TRUE,FALSE)) %>%
  mutate(fittedAfter = ifelse(Q_b_F_C_P_value > 0.05,TRUE,FALSE)) 

 

library(MASS)
tb1<-table(ddd$fittedBefore, ddd$fittedAfter)
tb1
chisq.test(tb1)



```




 


