import pandas as pd
import altair as alt

tables = pd.read_html('https://www.pgatour.com/stats/stat.101.2020.html')
drives = tables[1]
drives.head(10)
RANK THIS WEEK RANK LAST WEEK PLAYER NAME ROUNDS AVG. TOTAL DISTANCE TOTAL DRIVES
0 1 1 Bryson DeChambeau 8 344.4 5511 16
1 2 5 Dustin Johnson 4 333.8 2670 8
2 3 11 Rory McIlroy 8 333.4 5334 16
3 4 4 Joaquin Niemann 12 333.2 7996 24
4 5 6 Taylor Pendrith 4 331.9 2655 8
5 6 2 Cameron Champ 8 331.4 5302 16
6 7 7 Jon Rahm 8 324.8 5196 16
7 8 16 Tony Finau 4 324.6 2597 8
8 9 17 Wyndham Clark 10 324.5 6489 20
9 10 9 Ryan Palmer 10 321.8 6436 20
drives.describe()['AVG.']
count    264.000000
mean     301.050000
std       11.742452
min      267.400000
25%      294.100000
50%      301.100000
75%      307.900000
max      344.400000
Name: AVG., dtype: float64

Average Driving Distance Since 1980

# Create a List of Years
years = pd.date_range(start='1/1/1980', end=pd.to_datetime('today') +
                      pd.offsets.DateOffset(month=12, day=31), freq='Y').year

# Loop Through URL's and create a Dataframe

d = []

for year in years:
    tables = pd.read_html(f'https://www.pgatour.com/stats/stat.101.y{year}.html')
    
    try:
        drives = tables[1].loc[:100]
        
        d.append({'Year': year, 'Average Drive': drives['AVG.'].mean()})
    except:
        pass

df = pd.DataFrame(d)
df['Average Drive'] = df['Average Drive'].astype(int)

# Create a selection that chooses the nearest point & selects based on x-value
nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['Year'], empty='none')

# The basic line
line = alt.Chart(df).mark_line(interpolate='basis').encode(
    alt.X('Year:N'),
    alt.Y('Average Drive:Q', scale=alt.Scale(zero=False)),
)

# Transparent selectors across the chart. This is what tells us
# the x-value of the cursor
selectors = alt.Chart(df).mark_point().encode(
    x='Year:N',
    opacity=alt.value(0),
).add_selection(
    nearest
)

# Draw points on the line, and highlight based on selection
points = line.mark_point().encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)

# Draw text labels near the points, and highlight based on selection
text = line.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'Average Drive:Q', alt.value(' '))
)

# Draw a rule at the location of the selection
rules = alt.Chart(df).mark_rule(color='gray').encode(
    x='Year:N',
).transform_filter(
    nearest
)

# Put the five layers into a chart and bind the data
alt.layer(
    line, selectors, points, rules, text
).properties(
    width=850, height=500
)