import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import datetime
import dateutil
import calendar
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
sns.set()
calendar.setfirstweekday(0) # Monday
df = pd.read_csv('iPhone.xml', sep='\"', header=None,
skiprows=0, usecols=[13,17], names=('datetime', 'km'))
df.head(2)
datetime_format = "%Y-%m-%d %H:%M:%S %z"
start = datetime.datetime.strptime(df['datetime'].iloc[ 0], datetime_format)
end = datetime.datetime.strptime(df['datetime'].iloc[-1], datetime_format)
print(start)
print(end)
year = []
month = []
day = []
year_month = []
for t in df['datetime']:
date_object = datetime.datetime.strptime(t, datetime_format)
year.append(date_object.year)
month.append(date_object.month)
day.append(date_object.day)
ym = (date_object.year, date_object.month)
if ym not in year_month:
year_month.append(ym)
df['year'] = year
df['month'] = month
df['day'] = day
df.head(2)
df2 = df.groupby(['year', 'month', 'day']).sum()
df2.head(2)
df3 = pd.DataFrame({"Date": pd.date_range(start.date(), end.date())})
df3["year"] = df3.Date.dt.year
df3["month"] = df3.Date.dt.month
df3["day"] = df3.Date.dt.day
df3["weekday"] = df3.Date.dt.weekday
df3["km"] = 0.0
for index, row in df2.iterrows():
(yy, mm, dd) = index
d = datetime.date(yy, mm, dd)
e = (d - start.date()).days
df3.iloc[e, 5] = row['km']
# km_rand = df3['km'].sample(frac=1.0).reset_index(drop=True)
# df3['km'] = km_rand
df3['km'] = df3['km'].sample(frac=1.0).reset_index(drop=True) # scrambled
df3['km_ave'] = df3['km'].rolling(window=7, center=True).mean()
df3.head(4)
fig, ax = plt.subplots()
sns.lineplot(x='Date', y='km', data=df3, ax=ax)
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
fig.autofmt_xdate()
ax.set_title('Daily Walking Distance')
fig, ax = plt.subplots()
sns.lineplot(x='Date', y='km_ave', data=df3, ax=ax, color='coral')
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
fig.autofmt_xdate()
ax.set_title('Daily Walking Distance averated over 7 days')
ax = df3['km'].plot(kind='hist', bins=np.arange(25), color='olivedrab')
ax.set_title('Histogram of Daily Walking Distance')
df3.groupby(['year']).sum()['km']. \
plot(kind='bar', color=plt.cm.Set1(np.arange(7)))
df3.groupby(['year', 'month']).sum()['km'].plot(kind='area', color='peru')
ax = df3.groupby(['weekday']).sum()['km']. \
plot(kind='bar', color=plt.cm.Set2(np.arange(7)))
ax.set_xticklabels(list(calendar.day_abbr))
ax = df3.groupby(['month']).sum()['km']. \
plot(kind='bar', color=plt.cm.tab20c(np.arange(20)))
ax.set_xticklabels(list(calendar.month_abbr[1:]))
for y in np.arange(start.year, end.year + 1):
ax = df3[df3.year == y].groupby(['month']).sum()['km']. \
plot(kind='bar', color='burlywood')
ax.set_title('{}'.format(y))
plt.show()
for (y, m) in year_month[-3:]:
ax = df3[(df3.year == y) & (df3.month == m)]. \
groupby(['day']).sum()['km']. \
plot(kind='bar', color='skyblue')
ax.set_title('{}-{}'.format(y, m))
plt.show()