PGA Driving Distance Over the Years
A look at how PGA driving distance has changed over the Years
import pandas as pd
import altair as alt
tables = pd.read_html('https://www.pgatour.com/stats/stat.101.2020.html')
drives = tables[1]
drives.head(10)
drives.describe()['AVG.']
# Create a List of Years
years = pd.date_range(start='1/1/1980', end=pd.to_datetime('today') +
pd.offsets.DateOffset(month=12, day=31), freq='Y').year
# Loop Through URL's and create a Dataframe
d = []
for year in years:
tables = pd.read_html(f'https://www.pgatour.com/stats/stat.101.y{year}.html')
try:
drives = tables[1].loc[:100]
d.append({'Year': year, 'Average Drive': drives['AVG.'].mean()})
except:
pass
df = pd.DataFrame(d)
df['Average Drive'] = df['Average Drive'].astype(int)
# Create a selection that chooses the nearest point & selects based on x-value
nearest = alt.selection(type='single', nearest=True, on='mouseover',
fields=['Year'], empty='none')
# The basic line
line = alt.Chart(df).mark_line(interpolate='basis').encode(
alt.X('Year:N'),
alt.Y('Average Drive:Q', scale=alt.Scale(zero=False)),
)
# Transparent selectors across the chart. This is what tells us
# the x-value of the cursor
selectors = alt.Chart(df).mark_point().encode(
x='Year:N',
opacity=alt.value(0),
).add_selection(
nearest
)
# Draw points on the line, and highlight based on selection
points = line.mark_point().encode(
opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)
# Draw text labels near the points, and highlight based on selection
text = line.mark_text(align='left', dx=5, dy=-5).encode(
text=alt.condition(nearest, 'Average Drive:Q', alt.value(' '))
)
# Draw a rule at the location of the selection
rules = alt.Chart(df).mark_rule(color='gray').encode(
x='Year:N',
).transform_filter(
nearest
)
# Put the five layers into a chart and bind the data
alt.layer(
line, selectors, points, rules, text
).properties(
width=850, height=500
)