In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import datetime
import dateutil
import calendar
In [2]:
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
In [3]:
sns.set()
calendar.setfirstweekday(0) # Monday
In [4]:
df = pd.read_csv('iPhone.xml', sep='\"', header=None,
    skiprows=0, usecols=[13,17], names=('datetime', 'km'))
In [5]:
df.head(2)
Out[5]:
datetime km
0 2016-08-01 07:28:31 +0900 0.297073
1 2016-08-01 07:33:33 +0900 0.261040
In [6]:
datetime_format = "%Y-%m-%d %H:%M:%S %z"
start = datetime.datetime.strptime(df['datetime'].iloc[ 0], datetime_format)
end   = datetime.datetime.strptime(df['datetime'].iloc[-1], datetime_format)
print(start)
print(end)
2016-08-01 07:28:31+09:00
2019-07-27 18:13:38+09:00
In [7]:
year = []
month = []
day = []
year_month = []
for t in df['datetime']:
    date_object = datetime.datetime.strptime(t, datetime_format)
    year.append(date_object.year)
    month.append(date_object.month)
    day.append(date_object.day)
    ym = (date_object.year, date_object.month)
    if ym not in year_month:
        year_month.append(ym)
In [8]:
df['year'] = year
df['month'] = month
df['day'] = day
In [9]:
df.head(2)
Out[9]:
datetime km year month day
0 2016-08-01 07:28:31 +0900 0.297073 2016 8 1
1 2016-08-01 07:33:33 +0900 0.261040 2016 8 1
In [10]:
df2 = df.groupby(['year', 'month', 'day']).sum()
In [11]:
df2.head(2)
Out[11]:
km
year month day
2016 8 1 5.770613
2 5.244446
In [12]:
df3 = pd.DataFrame({"Date": pd.date_range(start.date(), end.date())})
In [13]:
df3["year"] = df3.Date.dt.year
df3["month"] = df3.Date.dt.month
df3["day"] = df3.Date.dt.day
df3["weekday"] = df3.Date.dt.weekday
df3["km"] = 0.0
In [14]:
for index, row in df2.iterrows():
    (yy, mm, dd) = index
    d = datetime.date(yy, mm, dd)
    e = (d - start.date()).days
    df3.iloc[e, 5] = row['km']
In [15]:
# km_rand = df3['km'].sample(frac=1.0).reset_index(drop=True)
# df3['km'] = km_rand
In [16]:
df3['km'] = df3['km'].sample(frac=1.0).reset_index(drop=True) # scrambled
In [17]:
df3['km_ave'] = df3['km'].rolling(window=7, center=True).mean()
In [18]:
df3.head(4)
Out[18]:
Date year month day weekday km km_ave
0 2016-08-01 2016 8 1 0 4.416292 NaN
1 2016-08-02 2016 8 2 1 5.910279 NaN
2 2016-08-03 2016 8 3 2 0.000000 NaN
3 2016-08-04 2016 8 4 3 3.751202 4.639385
In [19]:
fig, ax = plt.subplots()
sns.lineplot(x='Date', y='km', data=df3, ax=ax)
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
fig.autofmt_xdate()
ax.set_title('Daily Walking Distance')
Out[19]:
Text(0.5, 1.0, 'Daily Walking Distance')
In [20]:
fig, ax = plt.subplots()
sns.lineplot(x='Date', y='km_ave', data=df3, ax=ax, color='coral')
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
fig.autofmt_xdate()
ax.set_title('Daily Walking Distance averated over 7 days')
Out[20]:
Text(0.5, 1.0, 'Daily Walking Distance averated over 7 days')
In [21]:
ax = df3['km'].plot(kind='hist', bins=np.arange(25), color='olivedrab')
ax.set_title('Histogram of Daily Walking Distance')
Out[21]:
Text(0.5, 1.0, 'Histogram of Daily Walking Distance')
In [22]:
df3.groupby(['year']).sum()['km']. \
    plot(kind='bar', color=plt.cm.Set1(np.arange(7)))
Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a175ffe48>
In [23]:
df3.groupby(['year', 'month']).sum()['km'].plot(kind='area', color='peru')
Out[23]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a176d71d0>
In [24]:
ax = df3.groupby(['weekday']).sum()['km']. \
    plot(kind='bar', color=plt.cm.Set2(np.arange(7)))
ax.set_xticklabels(list(calendar.day_abbr))
Out[24]:
[Text(0, 0, 'Mon'),
 Text(0, 0, 'Tue'),
 Text(0, 0, 'Wed'),
 Text(0, 0, 'Thu'),
 Text(0, 0, 'Fri'),
 Text(0, 0, 'Sat'),
 Text(0, 0, 'Sun')]
In [25]:
ax = df3.groupby(['month']).sum()['km']. \
    plot(kind='bar', color=plt.cm.tab20c(np.arange(20)))
ax.set_xticklabels(list(calendar.month_abbr[1:]))
Out[25]:
[Text(0, 0, 'Jan'),
 Text(0, 0, 'Feb'),
 Text(0, 0, 'Mar'),
 Text(0, 0, 'Apr'),
 Text(0, 0, 'May'),
 Text(0, 0, 'Jun'),
 Text(0, 0, 'Jul'),
 Text(0, 0, 'Aug'),
 Text(0, 0, 'Sep'),
 Text(0, 0, 'Oct'),
 Text(0, 0, 'Nov'),
 Text(0, 0, 'Dec')]
In [26]:
for y in np.arange(start.year, end.year + 1):
    ax = df3[df3.year == y].groupby(['month']).sum()['km']. \
        plot(kind='bar', color='burlywood')
    ax.set_title('{}'.format(y))
    plt.show()  
In [27]:
for (y, m) in year_month[-3:]:
    ax = df3[(df3.year == y) & (df3.month == m)]. \
        groupby(['day']).sum()['km']. \
        plot(kind='bar', color='skyblue')
    ax.set_title('{}-{}'.format(y, m))
    plt.show()  
In [ ]: