'data.frame': 4500 obs. of 11 variables:
$ X : int 1 2 3 4 5 6 7 8 9 10 ...
$ id : int 71 96 103 106 109 118 120 188 191 211 ...
$ outliersPro : num 0.14002 0.06416 0.00631 0.00589 0.15473 ...
$ estimated_rate_Before_Handling: num 1.316 0.444 0.296 0.511 1.309 ...
$ estimated_rate_After_Q_b_F_C : num 2.262 0.5 0.297 0.515 3.781 ...
$ estimated_rate_After_mean : num 3.96 0.623 0.302 0.524 6.258 ...
$ estimated_rate_After_median : num 4.473 0.648 0.303 0.525 6.83 ...
$ Before_P_value : num 0 0 1 0.404 0 ...
$ Q_b_F_C_P_value : num 0 0 1 0.579 0 ...
$ mean_P_value : num 0 0 1 0.662 0.997 0 0 0 1 0.483 ...
$ median_P_value : num 0 0 1 0.637 0 ...
p1 <- results %>% ggplot(aes(y = (outliersPro ))) +
geom_boxplot() + labs(title = "Outliers Proportion")
p2 <- results %>% ggplot(aes(x = (outliersPro ))) +
geom_histogram() + labs(title = "Outliers Proportion")
gridExtra::grid.arrange(p1,p2 , ncol = 2 )
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p1 <- results %>% ggplot(aes(y = (estimated_rate_Before_Handling ))) +
geom_boxplot() + labs(title = "Estimated Rate Before Handling")
p2 <- results %>% ggplot(aes(x = (estimated_rate_Before_Handling ))) +
geom_histogram() + labs(title = "Estimated Rate Before Handling" , subtitle = "scale X >> Log(10) " ) + scale_x_log10()
gridExtra::grid.arrange(p1,p2 , ncol = 2 )
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qqnorm((results$estimated_rate_Before_Handling), pch = 1, frame = FALSE)
qqline((results$estimated_rate_Before_Handling), col = "steelblue", lwd = 2)
p1 <- results %>% ggplot(aes(y = (estimated_rate_After_mean ))) +
geom_boxplot() + labs(title = "Estimated Rate After Handling {Mean}")
p2 <- results %>% ggplot(aes(x = (estimated_rate_After_mean ))) +
geom_histogram() + labs(title = "Estimated Rate After Handling {Mean}"
, subtitle = "scale X >> Log(10) "
) + scale_x_log10()
gridExtra::grid.arrange(p1,p2 , ncol = 2 )
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
results %>% select(estimated_rate_Before_Handling ,
estimated_rate_After_Q_b_F_C ,
estimated_rate_After_mean ,
estimated_rate_After_median
) %>%
gather("Method" , "estimatedRate" , estimated_rate_Before_Handling , estimated_rate_After_Q_b_F_C , estimated_rate_After_mean ,estimated_rate_After_median ) %>%
ggplot(aes(x = (estimatedRate))) +
geom_histogram( aes( fill = as.factor(Method))) +
labs(title = "Estimated Rate After Handling "
, subtitle = "scale X >> Log(10) " ) + scale_x_log10()+
facet_wrap(.~Method)
Error in select(., estimated_rate_Before_Handling, estimated_rate_After_Q_b_F_C, :
unused arguments (estimated_rate_Before_Handling, estimated_rate_After_Q_b_F_C, estimated_rate_After_mean, estimated_rate_After_median)
p1 = results %>%
mutate(fitted = ifelse(Before_P_value > 0.05,TRUE,FALSE)) %>%
ggplot(aes(x = results$outliersPro , y= Before_P_value)) +
geom_point(aes(color = fitted , alpha = .9)) +
labs(title = "Outliers Proportion And P_value Before"
)
p2 = results %>%
mutate(fitted = ifelse(Before_P_value > 0.05,TRUE,FALSE)) %>%
ggplot(aes(x= fitted)) +
geom_bar(aes(fill=fitted)) +
labs(title = "Outliers Proportion And P_value Before"
)
gridExtra::grid.arrange(p1,p2 , ncol = 2 )
p1 = results %>%
mutate(fitted = ifelse(mean_P_value > 0.05,TRUE,FALSE)) %>%
ggplot(aes(x = results$outliersPro , y= results$mean_P_value)) +
geom_point(aes(color = fitted , alpha = .5) ) +
labs(title = "Outliers Proportion And P_value After"
)
p2 = results %>%
mutate(fitted = ifelse(mean_P_value > 0.05,TRUE,FALSE)) %>%
ggplot(aes(x= fitted)) +
geom_bar(aes(fill=fitted)) +
labs(title = "Outliers Proportion And P_value After"
)
gridExtra::grid.arrange(p1,p2 , ncol = 2 )
Parametric Test for Association Between { Fitted Value Before And Fitted Values After {Mean} }
ddd <-results %>%
mutate(fittedBefore = ifelse(Before_P_value > 0.05,TRUE,FALSE)) %>%
mutate(fittedAfter = ifelse(mean_P_value > 0.05,TRUE,FALSE))
library(MASS)
tb1<-table(ddd$fittedBefore, ddd$fittedAfter)
tb1
FALSE TRUE
FALSE 1494 1590
TRUE 4 1412
chisq.test(tb1)
Pearson's Chi-squared test with Yates' continuity correction
data: tb1
X-squared = 1011.4, df = 1, p-value < 2.2e-16
p1 = results %>%
mutate(fitted = ifelse(median_P_value > 0.05,TRUE,FALSE)) %>%
ggplot(aes(x = results$outliersPro , y= results$median_P_value)) +
geom_point(aes(color = fitted , alpha = .5) ) +
labs(title = "Outliers Proportion And P_value After"
)
p2 = results %>%
mutate(fitted = ifelse(median_P_value > 0.05,TRUE,FALSE)) %>%
ggplot(aes(x= fitted)) +
geom_bar(aes(fill=fitted)) +
labs(title = "Outliers Proportion And P_value After"
)
gridExtra::grid.arrange(p1,p2 , ncol = 2 )
ddd <-results %>%
mutate(fittedBefore = ifelse(Before_P_value > 0.05,TRUE,FALSE)) %>%
mutate(fittedAfter = ifelse(median_P_value > 0.05,TRUE,FALSE))
library(MASS)
tb1<-table(ddd$fittedBefore, ddd$fittedAfter)
tb1
FALSE TRUE
FALSE 1761 1323
TRUE 4 1412
chisq.test(tb1)
Pearson's Chi-squared test with Yates' continuity correction
data: tb1
X-squared = 1311.8, df = 1, p-value < 2.2e-16
p1 = results %>%
mutate(fitted = ifelse(Q_b_F_C_P_value > 0.05,TRUE,FALSE)) %>%
ggplot(aes(x = results$outliersPro , y= results$Q_b_F_C_P_value)) +
geom_point(aes(color = fitted , alpha = .5) ) +
labs(title = "Outliers Proportion And P_value After"
)
p2 = results %>%
mutate(fitted = ifelse(Q_b_F_C_P_value > 0.05,TRUE,FALSE)) %>%
ggplot(aes(x= fitted)) +
geom_bar(aes(fill=fitted)) +
labs(title = "Outliers Proportion And P_value After"
)
gridExtra::grid.arrange(p1,p2 , ncol = 2 )
ddd <-results %>%
mutate(fittedBefore = ifelse(Before_P_value > 0.05,TRUE,FALSE)) %>%
mutate(fittedAfter = ifelse(Q_b_F_C_P_value > 0.05,TRUE,FALSE))
library(MASS)
tb1<-table(ddd$fittedBefore, ddd$fittedAfter)
tb1
FALSE TRUE
FALSE 1988 1096
TRUE 0 1416
chisq.test(tb1)
Pearson's Chi-squared test with Yates' continuity correction
data: tb1
X-squared = 1632.5, df = 1, p-value < 2.2e-16