What does this notebook do?
import pandas as pd
import plotly.express as px
import datetime
# Read in CSV file
df = pd.read_csv('export.csv')
# Remove "time zone offset" from "occurred_at" column and add new "occurred_at_day" column
df['occurred_at_day'] = df['occurred_at'].apply(lambda x: x[:len(x) - 15])
df['occurred_at'] = df['occurred_at'].apply(lambda x: x[:len(x) - 6])
df.head()
class | value | time | length | photo_url | description | occurred_at | body | updated_at | started_at | ended_at | created_by | occurred_at_day | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | GlucoseMeasurement | 100.0 | NaN | NaN | NaN | NaN | 2021-08-15 23:48:06 | NaN | NaN | NaN | NaN | NaN | 2021-08-15 |
1 | GlucoseMeasurement | 99.0 | NaN | NaN | NaN | NaN | 2021-08-15 23:33:06 | NaN | NaN | NaN | NaN | NaN | 2021-08-15 |
2 | GlucoseMeasurement | 99.0 | NaN | NaN | NaN | NaN | 2021-08-15 23:18:06 | NaN | NaN | NaN | NaN | NaN | 2021-08-15 |
3 | GlucoseMeasurement | 98.0 | NaN | NaN | NaN | NaN | 2021-08-15 23:03:06 | NaN | NaN | NaN | NaN | NaN | 2021-08-15 |
4 | GlucoseMeasurement | 97.0 | NaN | NaN | NaN | NaN | 2021-08-15 22:48:06 | NaN | NaN | NaN | NaN | NaN | 2021-08-15 |
# Print all days with data
daysWithData = df['occurred_at_day'].unique()
print(daysWithData)
['2021-08-15' '2021-08-14' '2021-08-13' '2021-08-12' '2021-08-11' '2021-08-10' '2021-08-09' '2021-08-08' '2021-08-07' '2021-08-06' '2021-08-05']
# Create a new dataset that only contains glucose measurements
gm = df[df['class']=='GlucoseMeasurement']
gm.head()
class | value | time | length | photo_url | description | occurred_at | body | updated_at | started_at | ended_at | created_by | occurred_at_day | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | GlucoseMeasurement | 100.0 | NaN | NaN | NaN | NaN | 2021-08-15 23:48:06 | NaN | NaN | NaN | NaN | NaN | 2021-08-15 |
1 | GlucoseMeasurement | 99.0 | NaN | NaN | NaN | NaN | 2021-08-15 23:33:06 | NaN | NaN | NaN | NaN | NaN | 2021-08-15 |
2 | GlucoseMeasurement | 99.0 | NaN | NaN | NaN | NaN | 2021-08-15 23:18:06 | NaN | NaN | NaN | NaN | NaN | 2021-08-15 |
3 | GlucoseMeasurement | 98.0 | NaN | NaN | NaN | NaN | 2021-08-15 23:03:06 | NaN | NaN | NaN | NaN | NaN | 2021-08-15 |
4 | GlucoseMeasurement | 97.0 | NaN | NaN | NaN | NaN | 2021-08-15 22:48:06 | NaN | NaN | NaN | NaN | NaN | 2021-08-15 |
# Calculate a few simple metrics for every day
# Create a data frame that will hold the metrics
summaryData = pd.DataFrame(columns=["day","averageGlucose", "medianGlucose", "minGlucose", "maxGlucose"])
# Loop through all days in the dataset and calulate metrics
for day in daysWithData:
gmDataForOneDay = gm[gm['occurred_at_day']==day]
averageGlucose = int(round(gmDataForOneDay['value'].mean()))
medianGlucose = int(round(gmDataForOneDay['value'].median()))
maxGlucose = int(round(gmDataForOneDay['value'].max()))
minGlucose = int(round(gmDataForOneDay['value'].min()))
# Add to data frame
summaryData = summaryData.append({'day':day, 'averageGlucose':averageGlucose, 'medianGlucose':medianGlucose, "minGlucose":minGlucose, "maxGlucose":maxGlucose}, ignore_index=True)
summaryData.head(n=10)
day | averageGlucose | medianGlucose | minGlucose | maxGlucose | |
---|---|---|---|---|---|
0 | 2021-08-15 | 91 | 89 | 70 | 112 |
1 | 2021-08-14 | 94 | 91 | 58 | 131 |
2 | 2021-08-13 | 94 | 92 | 75 | 144 |
3 | 2021-08-12 | 91 | 91 | 74 | 105 |
4 | 2021-08-11 | 93 | 92 | 79 | 108 |
5 | 2021-08-10 | 94 | 91 | 80 | 119 |
6 | 2021-08-09 | 91 | 91 | 72 | 126 |
7 | 2021-08-08 | 95 | 94 | 78 | 119 |
8 | 2021-08-07 | 99 | 95 | 77 | 137 |
9 | 2021-08-06 | 95 | 91 | 52 | 150 |
# Calculate average glucose across all days
averageGlucoseAcrossAllDays = int(round(gm['value'].mean()))
print("Average Glucose for " + str(len(daysWithData)) + " days: " + str(averageGlucoseAcrossAllDays))
Average Glucose for 11 days: 94
# Chart the data for all days
fig = px.line(gm, x = "occurred_at", y="value")
# Draw the average as well
fig.add_shape(type="line", xref="x", yref="y", x0=gm.occurred_at.min(), y0=averageGlucoseAcrossAllDays, x1=gm.occurred_at.max() , y1=averageGlucoseAcrossAllDays, line_color="black",)
fig.show()