including libraries
creating a dataset having 100 rows with columns
data=tibble(
category =sample (c('A','B','C'),
num_rows,replace=TRUE),
numeric_variable=rnorm(num_rows,mean=50,sd=10)
)
str(data)
## tibble [100 × 2] (S3: tbl_df/tbl/data.frame)
## $ category : chr [1:100] "B" "A" "A" "A" ...
## $ numeric_variable: num [1:100] 62.6 51.9 35.7 64.3 44.6 ...
summary(data)
## category numeric_variable
## Length:100 Min. :22.73
## Class :character 1st Qu.:45.13
## Mode :character Median :50.26
## Mean :51.16
## 3rd Qu.:59.80
## Max. :75.00
data_summary =data %>%
group_by(category) %>%
summarise(
count =n(),
mean_variable =mean(numeric_variable),
median_variable =median(numeric_variable)
)
printing the summary .Using ggplot we plot graphs using diffrent columns filled with diffrent colours and labs
## # A tibble: 3 × 4
## category count mean_variable median_variable
## <chr> <int> <dbl> <dbl>
## 1 A 35 51.8 50.3
## 2 B 41 50.0 49.1
## 3 C 24 52.2 50.7