---
title: "Home Literacy"
author: "B. Brossette"
toc: true
number-sections: true
highlight-style: pygments
format:
html:
code-fold: true
code-tools: true
editor: visual
---
```{r}
#| label: package
#| code-summary: "Librairies Loading"
#| message: false
#clean the environment
rm (list = ls ())
#Libraries
library (readr)
library (stringr)
library (plyr)
library (dplyr)
library (tidyverse)
library (lme4)
library (car)
library (emmeans)
library (ggthemes)
library (data.table)
library (corrplot)
library (Hmisc)
library (lavaan)
library (lavaanPlot)
library (semPlot)
library (effectsize)
library (chron)
library (knitr)
```
## Préparation des données
```{r}
#| message: false
#Formatting activity rows data.
activity <- read_delim ("../src/home_literacy/activity_student_record.csv" ,
delim = ";" , escape_double = FALSE , trim_ws = TRUE )
activity <- activity %>% mutate (uid = str_sub (user.__ref__,str_locate (user.__ref__, "/" )[1 ]+ 1 ))
activity <- activity %>%
mutate (day = as.Date ( str_sub (date,1 , str_locate (date, " " )[1 ]- 1 ), format= "%Y-%m-%d" ),
hour = times (str_sub (date, str_locate (date, " " )[1 ]+ 1 )))
activity <- activity %>% mutate (length = (duration/ 1000 )/ 60 ) %>% mutate (length = format (length, scientific = FALSE ))
activity <- activity %>% select (uid, day, hour, length, activity_type, concentration, difficulty, interaction, perceived_time, pleasure)
#Formatting users rows data.
users <- read_delim ("../src/home_literacy/users_list.csv" ,
delim = ";" , escape_double = FALSE , trim_ws = TRUE )
users <- users %>% select (uid,user_category, email, grade, school, teacher_name)
#Jointure
df <- activity %>% left_join (users, by = "uid" )
#Delete teacher records AND test records
df <- df %>% filter (school!= "TEST_SCHOOL" )
df <- df %>% filter (user_category!= "T" )
```
# Statistiques générales
- Nombre d'activités enregistrées : `r length(df$uid)`
- Durée d'une activité (en min) : M = `r mean(as.numeric(df$length))` ; MED = `r median(as.numeric(df$length))` ; SD = `r sd(as.numeric(df$length))`
- Nombre d'utilisateurs actifs : `r length(unique(df$uid))`
```{r}
#| message: false
describe <- df %>%
group_by (grade,uid) %>%
summarise (nb_act = n (),
first_activity = min (day),
last_activity = max (day),
period_activity = last_activity - first_activity,
mean_duration = mean (as.numeric (length)),
shortest = min (as.numeric (length)),
longest = max (as.numeric (length)))
kable (describe[order (describe$ period_activity, decreasing= TRUE ),])
```