Please see my accompanying blog post.
This notebook and the data files referenced below are available for download:
import pandas as pd
stcli_df = pd.read_csv("speed.csv", parse_dates=[0], index_col=0)
stcli_df.head()
Note: the Trimmed Average
columns drop the fasted and slowest speeds for the 5-run test and average the remaining three.
# Make new column with rolling mean for 1 day periods
stcli_daily_mean = stcli_df[['Trimmed Average Download']].resample('1D').mean()
stcli_rolling_mean = stcli_daily_mean.rolling(7, min_periods=7).mean().dropna()
stcli_rolling_mean.head()
import matplotlib.pyplot as plt
import matplotlib.dates
%matplotlib notebook
fig, ax = plt.subplots()
plt.xticks(rotation=-45, horizontalalignment='left')
ax.xaxis.set_major_locator(matplotlib.dates.MonthLocator(interval=1))
ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%Y-%m'))
ax.set_title("Average Internet Speed")
ax.annotate("Centurylink (ABQ)", xy=("2016-06-15", 12), xytext=("2016-09", 5), ha="center",
arrowprops=dict(facecolor='black'),
)
ax.annotate("Comcast (ABQ)", xy=("2016-10", 83), xytext=("2016-12", 83), va="center",
arrowprops=dict(facecolor='black'),
)
ax.annotate("Frontier (Shiprock)", xy=("2017-03", 8), xytext=("2017-03", 25), ha="center",
arrowprops=dict(facecolor='black', shrink=0.10),
)
with plt.style.context("seaborn"):
plt.plot(stcli_rolling_mean)
plt.tight_layout()
hass_speeds.csv
comes from Home Assistant's Fast.com sensor. "Unknown" speeds usually mean that either the internet was out, (or possibly that I was working on the modem, which was usually because the internet was out), so I am replacing these values with 0
.
hass_df = pd.read_csv(
"hass_speeds.csv",
parse_dates=[1],
index_col=1,
header=0,
names=["Speed", "Date"],
converters={'Speed': lambda val: 0. if val == 'unknown' else float(val)},
)
hass_df.head()
hass_daily_mean = hass_df[hass_df["Speed"] != 0.0].resample("1D").mean()
hass_rolling_mean = hass_daily_mean.rolling(7, min_periods=7).mean().dropna()
from bokeh.plotting import figure, output_notebook, show
output_notebook()
p = figure(
title="Internet speed with Frontier (excluding dropouts)",
y_axis_label='Download Speed (Mbps)',
x_axis_type='datetime',
x_range=(hass_rolling_mean.index.min(),hass_rolling_mean.index.max()),
y_range=(hass_rolling_mean['Speed'].min(),hass_rolling_mean['Speed'].max()),
)
p.title.text_font_size = '16pt'
p.line(
x=hass_rolling_mean.index,
y=hass_rolling_mean['Speed'],
legend="Fast.com",
line_width=2,
color="DodgerBlue",
)
p.line(
x=stcli_rolling_mean.index,
y=stcli_rolling_mean['Trimmed Average Download'],
legend="speedtest-cli",
line_width=2,
color="LightCoral",
)
show(p)
# Count of dropouts per day, renaming the column appropriately
dropouts = hass_df[hass_df['Speed'] == 0.].resample('1D').count()
dropouts.columns = ["Count"]
# 7 day rolling average for number of dropouts per day
rolling_dropouts = dropouts.rolling(7).mean()
with plt.style.context("seaborn"):
fig, ax = plt.subplots()
plt.xticks(rotation=-20, horizontalalignment='left')
ax.xaxis.set_major_locator(matplotlib.dates.MonthLocator(interval=1))
ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%Y-%m'))
ax.set_ylabel("Dropouts per day")
plt.title("Frontier internet dropouts per day (7 day running average)")
plt.plot(rolling_dropouts.index, rolling_dropouts)
plt.tight_layout()
with plt.style.context("ggplot"):
fig, ax = plt.subplots()
plt.hist(dropouts['Count'], bins=range(0, dropouts['Count'].max()))
datefmt="%m/%d/%Y"
plt.ylabel("Number of days")
plt.xlabel("Number of dropouts per day")
plt.title(f"Frontier internet dropouts per day ({dropouts.index[0]:{datefmt}} - {dropouts.index[-1]:{datefmt}})")
plt.show()
overall_average = hass_df[(hass_df['Speed'] != 0.0) & (hass_df['Speed'].notnull())].mean()[0]
print(f"Average internet speed for Frontier (excluding dropouts): {overall_average:.2} Mbps")