library(tidyverse) library(lubridate) library(ggplot2)
activity1 <- read_csv(“/home/m/Datanalyst/dossier1/dailyActivity_merged.1.csv”) activity2 <- read_csv(“/home/m/Datanalyst/dossier1/dailyActivity_merged.2.csv”) sleep1 <- read_csv(“/home/m/Datanalyst/dossier1/minuteSleep_merged.1.csv”) sleep2 <- read_csv(“/home/m/Datanalyst/dossier1/minuteSleep_merged.2.csv”) weight1 <- read_csv(“/home/m/Datanalyst/dossier1/weightLogInfo_merged.1.csv”) weight2 <- read_csv(“/home/m/Datanalyst/dossier1/weightLogInfo_merged.2.csv”)
dailyActivity <- bind_rows(activity1, activity2) minuteSleep <- bind_rows(sleep1, sleep2) weightLogInfo <- bind_rows(weight1, weight2)
dailyActivity <- dailyActivity %>% rename( id = Id, date = ActivityDate, total_steps = TotalSteps, total_distance = TotalDistance, tracker_distance = TrackerDistance, logged_activities_distance = LoggedActivitiesDistance, very_active_distance = VeryActiveDistance, moderately_active_distance = ModeratelyActiveDistance, light_active_distance = LightActiveDistance, sedentary_active_distance = SedentaryActiveDistance, very_active_minutes = VeryActiveMinutes, fairly_active_minutes = FairlyActiveMinutes, lightly_active_minutes = LightlyActiveMinutes, sedentary_minutes = SedentaryMinutes, calories = Calories ) %>% mutate( date = mdy(date), id = as.character(id) ) %>% distinct() %>% filter(calories > 0 & total_steps >= 0) %>% mutate( total_active_minutes = very_active_minutes + fairly_active_minutes + lightly_active_minutes, total_hours_active = total_active_minutes / 60, day_of_week = wday(date, label = TRUE) )
minuteSleep <- minuteSleep %>% rename( id = Id, date = date, sleep_minutes = value ) %>% mutate( date = ymd_hms(date), id = as.character(id), day = as_date(date) ) %>% distinct()
sleep_summary <- minuteSleep %>% group_by(id, day) %>% summarise(total_sleep_minutes = sum(sleep_minutes, na.rm = TRUE)) %>% ungroup()
weightLogInfo <- weightLogInfo %>% rename( id = Id, date = Date, weight_kg = WeightKg, bmi = BMI ) %>% mutate( date = ymd_hms(date), id = as.character(id) ) %>% distinct() %>% select(id, date, weight_kg, bmi) %>% mutate(day = as_date(date))
activity_sleep <- dailyActivity %>% left_join(sleep_summary, by = c(“id” = “id”, “date” = “day”))
final_data <- activity_sleep %>% left_join(weightLogInfo, by = c(“id”, “date” = “day”)) %>% mutate( total_sleep_hours = total_sleep_minutes / 60, bmi = ifelse(bmi == 0, NA, bmi), weight_kg = ifelse(weight_kg == 0, NA, weight_kg) )
write_csv(final_data, “fitbit_data_clean.csv”) cat(“✅ Nettoyage terminé ! Fichier exporté : fitbit_data_clean.csv”)
cat(“📊 Création des visualisations…”)
ggplot(final_data, aes(x = total_steps, y = calories, color = day_of_week)) + geom_point(alpha = 0.6) + geom_smooth(method = “lm”, se = FALSE, color = “black”) + labs( title = “Corrélation entre le nombre de pas et les calories brûlées”, x = “Nombre total de pas”, y = “Calories brûlées” ) + theme_minimal()
ggplot(final_data, aes(x = total_sleep_hours, y = calories)) + geom_point(alpha = 0.6, color = “blue”) + geom_smooth(method = “lm”, se = FALSE, color = “red”) + labs( title = “Impact du sommeil sur la dépense calorique”, x = “Heures de sommeil”, y = “Calories brûlées” ) + theme_minimal()
daily_avg <- final_data %>% group_by(day_of_week) %>% summarise( avg_steps = mean(total_steps, na.rm = TRUE), avg_calories = mean(calories, na.rm = TRUE) )
ggplot(daily_avg, aes(x = day_of_week, y = avg_steps, fill = day_of_week)) + geom_col() + labs( title = “Nombre moyen de pas selon le jour de la semaine”, x = “Jour”, y = “Pas moyens” ) + theme_minimal() + theme(legend.position = “none”)
cat(“✅ Visualisations prêtes !”)
cat(“📈 Analyse statistique en cours…”)
corr_data <- final_data %>% select(total_steps, total_hours_active, total_sleep_hours, calories, weight_kg, bmi) %>% drop_na()
cor_matrix <- cor(corr_data) cat(“📊 Matrice de corrélation :”) print(round(cor_matrix, 2))
cat(“📈 Statistiques globales :”) summary_stats <- final_data %>% summarise( moy_steps = mean(total_steps, na.rm = TRUE), moy_calories = mean(calories, na.rm = TRUE), moy_sleep = mean(total_sleep_hours, na.rm = TRUE), moy_active = mean(total_hours_active, na.rm = TRUE), moy_bmi = mean(bmi, na.rm = TRUE) ) print(summary_stats)
user_summary <- final_data %>% group_by(id) %>% summarise( mean_steps = mean(total_steps, na.rm = TRUE), mean_calories = mean(calories, na.rm = TRUE), mean_sleep = mean(total_sleep_hours, na.rm = TRUE), mean_active = mean(total_hours_active, na.rm = TRUE), mean_bmi = mean(bmi, na.rm = TRUE) )
cat(“👤 Moyennes par utilisateur :”) print(head(user_summary, 10)) # afficher les 10 premiers
top_active <- user_summary %>% arrange(desc(mean_steps)) %>% head(5)
cat(“🏆 Top 5 des utilisateurs les plus actifs :”) print(top_active)
cat(“✅ Analyse complète terminée avec succès !”)