Bellabeat, a high-tech manufacturer of health-focused products for women. Collecting data on activity, sleep, stress, and reproductive health has allowed Bellabeat to empower women with knowledge about their own health and habits. Since it was founded in 2013, Bellabeat has grown rapidly and quickly positioned itself as a tech-driven wellness company for women. Bellabeat is a successful small company, but they have the potential to become a larger player in the global smart device market. Urška Sršen, co-founder and Chief Creative Officer of Bellabeat, believes that analyzing smart device fitness data could help unlock new growth opportunities for the company. So she wants us to analyze smart device usage data in order to gain insight into how consumers use non-Bellabeat smart devices. She then wants us to select one Bellabeat product to apply these insights to in our presentation.
daily <- fread("D:/med tour Easy files/useffu data/Case study files/Fitabase Data 3.12.16-4.11.16/dailyActivity_merged.csv")
summary(daily)
## Id ActivityDate TotalSteps TotalDistance
## Min. :1503960366 Length:457 Min. : 0 Min. : 0.000
## 1st Qu.:2347167796 Class :character 1st Qu.: 1988 1st Qu.: 1.410
## Median :4057192912 Mode :character Median : 5986 Median : 4.090
## Mean :4628594643 Mean : 6547 Mean : 4.664
## 3rd Qu.:6391747486 3rd Qu.:10198 3rd Qu.: 7.160
## Max. :8877689391 Max. :28497 Max. :27.530
## TrackerDistance LoggedActivitiesDistance VeryActiveDistance
## Min. : 0.00 Min. :0.0000 Min. : 0.000
## 1st Qu.: 1.28 1st Qu.:0.0000 1st Qu.: 0.000
## Median : 4.09 Median :0.0000 Median : 0.000
## Mean : 4.61 Mean :0.1794 Mean : 1.181
## 3rd Qu.: 7.11 3rd Qu.:0.0000 3rd Qu.: 1.310
## Max. :27.53 Max. :6.7271 Max. :21.920
## ModeratelyActiveDistance LightActiveDistance SedentaryActiveDistance
## Min. :0.0000 Min. : 0.00 Min. :0.000000
## 1st Qu.:0.0000 1st Qu.: 0.87 1st Qu.:0.000000
## Median :0.0200 Median : 2.93 Median :0.000000
## Mean :0.4786 Mean : 2.89 Mean :0.001904
## 3rd Qu.:0.6700 3rd Qu.: 4.46 3rd Qu.:0.000000
## Max. :6.4000 Max. :12.51 Max. :0.100000
## VeryActiveMinutes FairlyActiveMinutes LightlyActiveMinutes SedentaryMinutes
## Min. : 0.00 Min. : 0.00 Min. : 0.0 Min. : 32.0
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 64.0 1st Qu.: 728.0
## Median : 0.00 Median : 1.00 Median :181.0 Median :1057.0
## Mean : 16.62 Mean : 13.07 Mean :170.1 Mean : 995.3
## 3rd Qu.: 25.00 3rd Qu.: 16.00 3rd Qu.:257.0 3rd Qu.:1285.0
## Max. :202.00 Max. :660.00 Max. :720.0 Max. :1440.0
## Calories
## Min. : 0
## 1st Qu.:1776
## Median :2062
## Mean :2189
## 3rd Qu.:2667
## Max. :4562
sorted_df <- daily[order(daily), ]
head(sorted_df)
## Id ActivityDate TotalSteps TotalDistance TrackerDistance
## <i64> <char> <int> <num> <num>
## 1: 1503960366 3/25/2016 11004 7.11 7.11
## 2: 1503960366 3/26/2016 17609 11.55 11.55
## 3: 1503960366 3/27/2016 12736 8.53 8.53
## 4: 1503960366 3/28/2016 13231 8.93 8.93
## 5: 1503960366 3/29/2016 12041 7.85 7.85
## 6: 1503960366 3/30/2016 10970 7.16 7.16
## LoggedActivitiesDistance VeryActiveDistance ModeratelyActiveDistance
## <num> <num> <num>
## 1: 0 2.57 0.46
## 2: 0 6.92 0.73
## 3: 0 4.66 0.16
## 4: 0 3.19 0.79
## 5: 0 2.16 1.09
## 6: 0 2.36 0.51
## LightActiveDistance SedentaryActiveDistance VeryActiveMinutes
## <num> <num> <int>
## 1: 4.07 0 33
## 2: 3.91 0 89
## 3: 3.71 0 56
## 4: 4.95 0 39
## 5: 4.61 0 28
## 6: 4.29 0 30
## FairlyActiveMinutes LightlyActiveMinutes SedentaryMinutes Calories
## <int> <int> <int> <int>
## 1: 12 205 804 1819
## 2: 17 274 588 2154
## 3: 5 268 605 1944
## 4: 20 224 1080 1932
## 5: 28 243 763 1886
## 6: 13 223 1174 1820
distinct_Id <- sorted_df %>%
distinct(Id)
distinct_Id
## Id
## <i64>
## 1: 1503960366
## 2: 1624580081
## 3: 1644430081
## 4: 1844505072
## 5: 1927972279
## 6: 2022484408
## 7: 2026352035
## 8: 2320127002
## 9: 2347167796
## 10: 2873212765
## 11: 2891001357
## 12: 3372868164
## 13: 3977333714
## 14: 4020332650
## 15: 4057192912
## 16: 4319703577
## 17: 4388161847
## 18: 4445114986
## 19: 4558609924
## 20: 4702921684
## 21: 5553957443
## 22: 5577150313
## 23: 6117666160
## 24: 6290855005
## 25: 6391747486
## 26: 6775888955
## 27: 6962181067
## 28: 7007744171
## 29: 7086361926
## 30: 8053475328
## 31: 8253242879
## 32: 8378563200
## 33: 8583815059
## 34: 8792009665
## 35: 8877689391
## Id
distinct_ad <- sorted_df %>%
distinct(ActivityDate)
distinct_ad
## ActivityDate
## <char>
## 1: 3/25/2016
## 2: 3/26/2016
## 3: 3/27/2016
## 4: 3/28/2016
## 5: 3/29/2016
## 6: 3/30/2016
## 7: 3/31/2016
## 8: 4/1/2016
## 9: 4/10/2016
## 10: 4/11/2016
## 11: 4/12/2016
## 12: 4/2/2016
## 13: 4/3/2016
## 14: 4/4/2016
## 15: 4/5/2016
## 16: 4/6/2016
## 17: 4/7/2016
## 18: 4/8/2016
## 19: 4/9/2016
## 20: 3/12/2016
## 21: 3/13/2016
## 22: 3/14/2016
## 23: 3/15/2016
## 24: 3/16/2016
## 25: 3/17/2016
## 26: 3/18/2016
## 27: 3/19/2016
## 28: 3/20/2016
## 29: 3/21/2016
## 30: 3/22/2016
## 31: 3/23/2016
## 32: 3/24/2016
## ActivityDate
filtered_df <- sorted_df[sorted_df$Id == 2347167796, ]
print(filtered_df)
## Id ActivityDate TotalSteps TotalDistance TrackerDistance
## <i64> <char> <int> <num> <num>
## 1: 2347167796 3/29/2016 10272 6.79 6.79
## 2: 2347167796 3/30/2016 10533 7.10 7.10
## 3: 2347167796 3/31/2016 6760 4.47 4.47
## 4: 2347167796 4/1/2016 8328 5.51 5.51
## 5: 2347167796 4/10/2016 10078 6.83 6.83
## 6: 2347167796 4/11/2016 10001 6.61 6.61
## 7: 2347167796 4/12/2016 0 0.00 0.00
## 8: 2347167796 4/2/2016 15459 10.22 10.22
## 9: 2347167796 4/3/2016 7485 4.95 4.95
## 10: 2347167796 4/4/2016 10254 6.80 6.80
## 11: 2347167796 4/5/2016 10114 6.82 6.82
## 12: 2347167796 4/6/2016 11107 7.34 7.34
## 13: 2347167796 4/7/2016 10320 6.85 6.85
## 14: 2347167796 4/8/2016 10209 6.75 6.75
## 15: 2347167796 4/9/2016 16081 10.63 10.63
## LoggedActivitiesDistance VeryActiveDistance ModeratelyActiveDistance
## <num> <num> <num>
## 1: 0 0.16 3.12
## 2: 0 1.77 2.06
## 3: 0 0.00 0.00
## 4: 0 0.00 2.00
## 5: 0 1.02 0.12
## 6: 0 0.33 2.93
## 7: 0 0.00 0.00
## 8: 0 3.59 0.81
## 9: 0 0.00 0.00
## 10: 0 1.42 1.23
## 11: 0 1.64 0.48
## 12: 0 0.90 2.76
## 13: 0 0.68 1.23
## 14: 0 0.16 0.35
## 15: 0 1.25 1.82
## LightActiveDistance SedentaryActiveDistance VeryActiveMinutes
## <num> <num> <int>
## 1: 3.50 0 2
## 2: 3.27 0 21
## 3: 4.47 0 0
## 4: 3.50 0 0
## 5: 5.69 0 12
## 6: 3.36 0 5
## 7: 0.00 0 0
## 8: 5.82 0 51
## 9: 4.95 0 0
## 10: 4.16 0 21
## 11: 4.68 0 18
## 12: 3.68 0 14
## 13: 4.94 0 15
## 14: 6.23 0 2
## 15: 7.56 0 16
## FairlyActiveMinutes LightlyActiveMinutes SedentaryMinutes Calories
## <int> <int> <int> <int>
## 1: 58 208 700 2041
## 2: 35 255 615 2187
## 3: 0 250 613 1929
## 4: 33 212 804 1935
## 5: 3 303 463 2164
## 6: 52 196 788 2009
## 7: 0 0 425 399
## 8: 16 327 583 2438
## 9: 0 324 491 2035
## 10: 25 231 638 2099
## 11: 9 269 696 2096
## 12: 46 196 759 2058
## 13: 32 315 1002 2338
## 14: 6 316 711 2104
## 15: 32 401 970 2488
actmin_df <- filtered_df[, lapply(.SD, mean), .SDcols = c("VeryActiveMinutes","FairlyActiveMinutes",
"LightlyActiveMinutes", "SedentaryMinutes")]
actmin_df
## VeryActiveMinutes FairlyActiveMinutes LightlyActiveMinutes SedentaryMinutes
## <num> <num> <num> <num>
## 1: 11.8 23.13333 253.5333 683.8667
active_minutes_in_a_day <- melt(actmin_df,
measure.vars = c("VeryActiveMinutes", "FairlyActiveMinutes",
"LightlyActiveMinutes", "SedentaryMinutes"),
variable.name = "ActivityLevel",
value.name = "AverageMinutes")
ggplot(active_minutes_in_a_day, aes(x = ActivityLevel, y = AverageMinutes, fill = ActivityLevel)) +
geom_bar(stat = "identity") +
geom_text(aes(label = round(AverageMinutes, 1)), vjust = -0.5) +
labs(title = "Average Daily Activity Minutes by Type", subtitle = "Activity level in a day",
x = "Activity Level",
y = "Average Minutes") +
theme_minimal() +
theme(legend.position = "middle")
actdis_df <- filtered_df[, lapply(.SD, mean), .SDcols = c("VeryActiveDistance", "ModeratelyActiveDistance", "LightActiveDistance", "SedentaryActiveDistance")]
actdis_df
## VeryActiveDistance ModeratelyActiveDistance LightActiveDistance
## <num> <num> <num>
## 1: 0.8613333 1.260667 4.387333
## SedentaryActiveDistance
## <num>
## 1: 0
active_distance_in_a_day <- melt(actdis_df, measure.vars
= c("VeryActiveDistance", "ModeratelyActiveDistance", "LightActiveDistance", "SedentaryActiveDistance"),
variable.name = "ActivityLevel",
value.name = "AverageDistance")
active_distance_in_a_day[, Percentage := AverageDistance / sum(AverageDistance) * 100]
ggplot(active_distance_in_a_day, aes(x = "Activity", y = Percentage, fill = ActivityLevel)) +
geom_bar(stat = "identity", width = 0.5) +
geom_text(aes(label = paste0(round(Percentage, 1), "%")),
position = position_stack(vjust = 0.5), color = "white", size = 4) +
labs(title = "Proportion of Daily Activity by Type",
x = NULL,
y = "Percentage") +
theme_minimal()+
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.title = element_blank())
ggplot(filtered_df)+ geom_line(mapping= aes( x= Calories, y= TotalSteps, color ="calories"))+
labs(title= "Corelation of steps & Calories")+
theme_minimal()
##### The plot shows a positive correlation between Total steps &
calories burnt.
ggplot(filtered_df)+ geom_line(mapping= aes( x= Calories, y= TotalDistance, color ="calories"))+
labs(title= "Corelation of Distance & Calories")+
theme_minimal()
##### The plot shows a positive correlation between Total steps &
calories burnt.
sleep <- fread("D:/med tour Easy files/useffu data/Case study files/Fitabase Data 3.12.16-4.11.16/minuteSleep_merged.csv")
print(sleep)
## Id date value log_id
## <i64> <char> <int> <i64>
## 1: 1503960366 3/13/2016 2:39 1 11114919637
## 2: 1503960366 3/13/2016 2:40 1 11114919637
## 3: 1503960366 3/13/2016 2:41 1 11114919637
## 4: 1503960366 3/13/2016 2:42 1 11114919637
## 5: 1503960366 3/13/2016 2:43 1 11114919637
## ---
## 198555: 8792009665 4/9/2016 18:38 1 11357751881
## 198556: 8792009665 4/9/2016 18:39 1 11357751881
## 198557: 8792009665 4/9/2016 18:40 1 11357751881
## 198558: 8792009665 4/9/2016 18:41 1 11357751881
## 198559: 8792009665 4/9/2016 18:42 1 11357751881
sleep_filtered <- sleep[sleep$Id == 2347167796, ]
sleep_filtered
## Id date value log_id
## <i64> <char> <int> <i64>
## 1: 2347167796 3/11/2016 23:43 3 11103744323
## 2: 2347167796 3/11/2016 23:44 3 11103744323
## 3: 2347167796 3/11/2016 23:45 2 11103744323
## 4: 2347167796 3/11/2016 23:46 1 11103744323
## 5: 2347167796 3/11/2016 23:47 1 11103744323
## ---
## 13480: 2347167796 4/11/2016 6:34 2 11363107879
## 13481: 2347167796 4/11/2016 6:35 3 11363107879
## 13482: 2347167796 4/11/2016 6:36 2 11363107879
## 13483: 2347167796 4/11/2016 6:37 2 11363107879
## 13484: 2347167796 4/11/2016 6:38 2 11363107879
sleep_filtered$date <- as.POSIXct(sleep_filtered$date, format = "%m/%d/%Y %H:%M")
sleep_filtered$Date_only <- format(sleep_filtered$date, "%Y/%m/%d")
sleep_filtered$Time_only <- format(sleep_filtered$date, "%H:%M:%S")
sleep_filtered
## Id date value log_id Date_only Time_only
## <i64> <POSc> <int> <i64> <char> <char>
## 1: 2347167796 2016-03-11 23:43:00 3 11103744323 2016/03/11 23:43:00
## 2: 2347167796 2016-03-11 23:44:00 3 11103744323 2016/03/11 23:44:00
## 3: 2347167796 2016-03-11 23:45:00 2 11103744323 2016/03/11 23:45:00
## 4: 2347167796 2016-03-11 23:46:00 1 11103744323 2016/03/11 23:46:00
## 5: 2347167796 2016-03-11 23:47:00 1 11103744323 2016/03/11 23:47:00
## ---
## 13480: 2347167796 2016-04-11 06:34:00 2 11363107879 2016/04/11 06:34:00
## 13481: 2347167796 2016-04-11 06:35:00 3 11363107879 2016/04/11 06:35:00
## 13482: 2347167796 2016-04-11 06:36:00 2 11363107879 2016/04/11 06:36:00
## 13483: 2347167796 2016-04-11 06:37:00 2 11363107879 2016/04/11 06:37:00
## 13484: 2347167796 2016-04-11 06:38:00 2 11363107879 2016/04/11 06:38:00
sleep_filtered <- as.data.table(sleep_filtered)
sleep_filtered[, Date_only := as.Date(date)]
daily_mean <- sleep_filtered[, .(Mean_Value = mean(value, na.rm = TRUE)),
by = Date_only]
daily_mean
## Date_only Mean_Value
## <Date> <num>
## 1: 2016-03-11 1.043228
## 2: 2016-03-12 1.094188
## 3: 2016-03-13 1.082258
## 4: 2016-03-14 1.121795
## 5: 2016-03-15 1.079855
## 6: 2016-03-16 1.108384
## 7: 2016-03-17 1.090186
## 8: 2016-03-18 1.272517
## 9: 2016-03-19 1.100000
## 10: 2016-03-20 1.100855
## 11: 2016-03-21 1.088843
## 12: 2016-03-22 1.074570
## 13: 2016-03-23 1.131846
## 14: 2016-03-24 1.126273
## 15: 2016-03-25 1.000000
## 16: 2016-03-27 1.130802
## 17: 2016-03-28 1.133929
## 18: 2016-03-29 1.042373
## 19: 2016-03-30 1.126459
## 20: 2016-03-31 1.081456
## 21: 2016-04-01 1.094556
## 22: 2016-04-02 1.116832
## 23: 2016-04-03 1.083200
## 24: 2016-04-04 1.139048
## 25: 2016-04-05 1.176339
## 26: 2016-04-06 1.084211
## 27: 2016-04-07 1.083744
## 28: 2016-04-08 1.013333
## 29: 2016-04-09 1.099715
## 30: 2016-04-10 1.116844
## 31: 2016-04-11 1.130435
## Date_only Mean_Value
ggplot(daily_mean)+ geom_line(mapping= aes( x= Date_only, y= Mean_Value), color ="Blue")+
labs(title= "Sleep value fluctuation through the days of tracking")+
theme_minimal()
heart <- fread("D:/med tour Easy files/useffu data/Case study files/Fitabase Data 3.12.16-4.11.16/heartrate_seconds_merged.csv")
print(heart)
## Id Time Value
## <i64> <char> <int>
## 1: 2022484408 4/1/2016 7:54 93
## 2: 2022484408 4/1/2016 7:54 91
## 3: 2022484408 4/1/2016 7:54 96
## 4: 2022484408 4/1/2016 7:54 98
## 5: 2022484408 4/1/2016 7:54 100
## ---
## 1048571: 8792009665 4/7/2016 14:20 80
## 1048572: 8792009665 4/7/2016 14:21 80
## 1048573: 8792009665 4/7/2016 14:21 79
## 1048574: 8792009665 4/7/2016 14:21 79
## 1048575: 8792009665 4/7/2016 14:21 80
heart_filtered <- heart[heart$Id == 2347167796, ]
heart_filtered
## Id Time Value
## <i64> <char> <int>
## 1: 2347167796 3/29/2016 0:00 69
## 2: 2347167796 3/29/2016 0:00 68
## 3: 2347167796 3/29/2016 0:00 69
## 4: 2347167796 3/29/2016 0:00 69
## 5: 2347167796 3/29/2016 0:00 69
## ---
## 120800: 2347167796 4/11/2016 21:45 81
## 120801: 2347167796 4/11/2016 21:45 83
## 120802: 2347167796 4/11/2016 21:45 84
## 120803: 2347167796 4/11/2016 21:45 86
## 120804: 2347167796 4/11/2016 21:46 86
heart_filtered$Time <- as.POSIXct(heart_filtered$Time, format = "%m/%d/%Y %H:%M")
heart_filtered$Date_only <- format(heart_filtered$Time, "%Y/%m/%d")
heart_filtered$Time_only <- format(heart_filtered$Time, "%H:%M:%S")
heart_filtered
## Id Time Value Date_only Time_only
## <i64> <POSc> <int> <char> <char>
## 1: 2347167796 2016-03-29 00:00:00 69 2016/03/29 00:00:00
## 2: 2347167796 2016-03-29 00:00:00 68 2016/03/29 00:00:00
## 3: 2347167796 2016-03-29 00:00:00 69 2016/03/29 00:00:00
## 4: 2347167796 2016-03-29 00:00:00 69 2016/03/29 00:00:00
## 5: 2347167796 2016-03-29 00:00:00 69 2016/03/29 00:00:00
## ---
## 120800: 2347167796 2016-04-11 21:45:00 81 2016/04/11 21:45:00
## 120801: 2347167796 2016-04-11 21:45:00 83 2016/04/11 21:45:00
## 120802: 2347167796 2016-04-11 21:45:00 84 2016/04/11 21:45:00
## 120803: 2347167796 2016-04-11 21:45:00 86 2016/04/11 21:45:00
## 120804: 2347167796 2016-04-11 21:46:00 86 2016/04/11 21:46:00
heart_filtered <- as.data.table(heart_filtered)
heart_filtered[, Date_only := as.Date(Time)] # if not already done
daily_mean <- heart_filtered[, .(Mean_Value = mean(Value, na.rm = TRUE)),
by = Date_only]
daily_mean
## Date_only Mean_Value
## <Date> <num>
## 1: 2016-03-28 64.79339
## 2: 2016-03-29 70.02429
## 3: 2016-03-30 73.92168
## 4: 2016-03-31 70.10231
## 5: 2016-04-01 71.88555
## 6: 2016-04-02 81.00350
## 7: 2016-04-03 74.40492
## 8: 2016-04-04 75.59180
## 9: 2016-04-05 76.25823
## 10: 2016-04-06 76.08930
## 11: 2016-04-07 83.38784
## 12: 2016-04-08 77.32405
## 13: 2016-04-09 83.99412
## 14: 2016-04-10 78.04957
## 15: 2016-04-11 75.49036
ggplot(heart_filtered)+ geom_line(mapping= aes( x= Value, y= Time_only), color = "Red")+
labs(title= "Heart rate fluctuation throughout the day")+
theme_minimal()
HMISC <- fread("D:/med tour Easy files/useffu data/Case study files/Fitabase Data 3.12.16-4.11.16/newly joined data/heart_MISC_minutes_data.csv")
print(HMISC)
## Id Activity_date Activity_time Value Intensity Steps METs
## <i64> <char> <char> <int> <int> <int> <int>
## 1: 2347167796 3/29/2016 00:55:00.000000 UTC 69 1 5 24
## 2: 2347167796 3/29/2016 00:55:00.000000 UTC 70 1 5 24
## 3: 2347167796 3/29/2016 00:55:00.000000 UTC 71 1 5 24
## 4: 2347167796 3/29/2016 00:55:00.000000 UTC 72 1 5 24
## 5: 2347167796 3/29/2016 00:55:00.000000 UTC 73 1 5 24
## ---
## 2400: 2347167796 3/29/2016 22:03:00.000000 UTC 62 1 5 24
## 2401: 2347167796 3/29/2016 22:03:00.000000 UTC 64 1 5 24
## 2402: 2347167796 3/29/2016 22:03:00.000000 UTC 65 1 5 24
## 2403: 2347167796 3/29/2016 22:03:00.000000 UTC 67 1 5 24
## 2404: 2347167796 3/29/2016 22:03:00.000000 UTC 70 1 5 24
## Calories
## <num>
## 1: 2.2584
## 2: 2.2584
## 3: 2.2584
## 4: 2.2584
## 5: 2.2584
## ---
## 2400: 2.2584
## 2401: 2.2584
## 2402: 2.2584
## 2403: 2.2584
## 2404: 2.2584
summary(HMISC)
## Id Activity_date Activity_time Value
## Min. :2347167796 Length:2404 Length:2404 Min. : 59.00
## 1st Qu.:2347167796 Class :character Class :character 1st Qu.: 76.00
## Median :2347167796 Mode :character Mode :character Median : 84.00
## Mean :2347167796 Mean : 84.48
## 3rd Qu.:2347167796 3rd Qu.: 92.00
## Max. :2347167796 Max. :135.00
## Intensity Steps METs Calories
## Min. :0.000 Min. : 0.0 Min. :12.00 Min. :1.129
## 1st Qu.:1.000 1st Qu.: 8.0 1st Qu.:24.00 1st Qu.:2.258
## Median :1.000 Median : 23.0 Median :30.00 Median :2.823
## Mean :1.152 Mean : 38.2 Mean :34.36 Mean :3.233
## 3rd Qu.:1.000 3rd Qu.: 73.0 3rd Qu.:46.00 3rd Qu.:4.329
## Max. :3.000 Max. :128.0 Max. :68.00 Max. :6.399
ggplot(HMISC)+ geom_line(mapping= aes( x= Intensity, y= METs), color ="Green")+
labs(title= "Plot Intensity Vs. METs")+
theme_minimal()