diff --git a/docs/active_roster.md b/docs/active_roster.md
new file mode 100644
index 00000000..68bb6a8c
--- /dev/null
+++ b/docs/active_roster.md
@@ -0,0 +1,20 @@
+# Roster
+
+`active_roster(team)`
+
+Get current 40-man roster for a given team. Contents of the table at
+https://www.baseball-reference.com/teams/WSN/2025.shtml#all_the40man for example. Adds two columns: one
+for player's bref ID, and one for the alternate URL for minor leaguers.
+
+## Arguments
+`team:` String. Must be a three-letter abbreviation that bref uses for an active MLB team.
+
+## Examples of valid queries
+
+```python
+from pybaseball import active_roster
+
+# get the Nationals' current 40-man roster
+data = active_roster('WSN')
+
+```
diff --git a/docs/appearances_bref.md b/docs/appearances_bref.md
new file mode 100644
index 00000000..30dccd5c
--- /dev/null
+++ b/docs/appearances_bref.md
@@ -0,0 +1,21 @@
+# Appearances Bref
+
+`appearances_bref(season)`
+
+Get defensive appearances for a given season.
+
+## Arguments
+`season:` Integer. Defaults to the current calendar year if no value is provided.
+
+## Examples of valid queries
+
+```python
+from pybaseball import appearances_bref
+
+# get the current season's up-to-date appearances
+data = appearances_bref()
+
+# get the end-of-season appearances for the 1960 season
+data = appearances_bref(1960)
+
+```
diff --git a/pybaseball/__init__.py b/pybaseball/__init__.py
index cc223855..226c21d1 100644
--- a/pybaseball/__init__.py
+++ b/pybaseball/__init__.py
@@ -34,6 +34,8 @@
statcast_fielding_run_value
)
from .league_batting_stats import batting_stats_bref
+from .appearances_bref import appearances_bref
+from .active_roster import active_roster
from .league_batting_stats import batting_stats_range
from .league_batting_stats import bwar_bat
from .league_pitching_stats import pitching_stats_bref
diff --git a/pybaseball/active_roster.py b/pybaseball/active_roster.py
new file mode 100644
index 00000000..b27fe87c
--- /dev/null
+++ b/pybaseball/active_roster.py
@@ -0,0 +1,81 @@
+from typing import Optional
+
+import pandas as pd
+from bs4 import BeautifulSoup, Comment
+
+from . import cache
+from .utils import most_recent_season, get_bref_id_from_player_link, ACTIVE_TEAMS
+from .datasources.bref import BRefSession
+
+session = BRefSession()
+
+def get_soup(team: str) -> BeautifulSoup:
+ url = f'https://www.baseball-reference.com/teams/{team}/{most_recent_season()}.shtml'
+ s = session.get(url).content
+ return BeautifulSoup(s, "lxml")
+
+def get_tables(soup: BeautifulSoup) -> pd.DataFrame:
+ data = []
+
+ # find commented 40-man roster table and parse that
+ all_the40man = soup.find(id='all_the40man')
+ comment = all_the40man.find(text=lambda text: isinstance(text, Comment))
+ table_wrapper = BeautifulSoup(comment, 'lxml')
+ table = table_wrapper.find(id='the40man')
+
+ headings = [th.get_text() for th in table.find("tr").find_all("th")]
+
+ # remove the Rk header, it's unnecessary
+ headings.pop(0)
+
+ # add ID column name
+ headings.append('player_ID')
+ headings.append('Alt URL')
+
+ # pull in data rows
+ table_body = table.find('tbody')
+ rows = table_body.find_all('tr')
+ for row in rows:
+ player_link = row.find('a')
+ if not player_link:
+ continue
+ cols = row.find_all('td')
+ cols = [ele.text.strip() for ele in cols]
+
+ player_link = player_link.get('href')
+
+ # determine whether the player has reached the majors and has a bref ID
+ if player_link.startswith('/players/'):
+ # player has played in majors and has an id
+ cols.append(get_bref_id_from_player_link(player_link))
+ cols.append('')
+ else:
+ # player has not reached the majors, give them an alternate url
+ cols.append('')
+ cols.append(player_link)
+
+ data.append([ele for ele in cols])
+
+ # use headings for column names
+ return pd.DataFrame(data, columns=headings)
+
+
+@cache.df_cache()
+def active_roster(team: str) -> pd.DataFrame:
+ """
+ Returns a pandas DataFrame of the 40-man roster for a given MLB team
+
+ ARGUMENTS
+ team (str): the three-letter bref abbreviation for an active MLB team
+ """
+ # make sure specified team is active
+ if team not in ACTIVE_TEAMS:
+ raise ValueError(
+ "Team must be the three-letter abbreviation of an active MLB team."
+ )
+
+ # retrieve html from baseball reference
+ soup = get_soup(team)
+
+ df = get_tables(soup)
+ return df
diff --git a/pybaseball/appearances_bref.py b/pybaseball/appearances_bref.py
new file mode 100644
index 00000000..f55e946b
--- /dev/null
+++ b/pybaseball/appearances_bref.py
@@ -0,0 +1,70 @@
+from typing import Optional
+
+import pandas as pd
+from bs4 import BeautifulSoup
+
+from . import cache
+from .utils import most_recent_season, get_bref_id_from_player_link
+from .datasources.bref import BRefSession
+
+session = BRefSession()
+
+def get_soup(year: int) -> BeautifulSoup:
+ url = f'https://www.baseball-reference.com/leagues/majors/{year}-appearances-fielding.shtml'
+ s = session.get(url).content
+ return BeautifulSoup(s, "lxml")
+
+def get_tables(soup: BeautifulSoup, season: int) -> pd.DataFrame:
+ data = []
+
+ # get player appearances table
+ table = soup.find(id='appearances')
+ headings = [th.get_text() for th in table.find("tr").find_all("th")]
+
+ # remove the Rk header, it's unnecessary
+ headings.pop(0)
+
+ # add ID column name
+ headings.append('player_ID')
+
+ # pull in data rows
+ table_body = table.find('tbody')
+ rows = table_body.find_all('tr')
+ for row in rows:
+ player_link = row.find('a')
+ if not player_link:
+ continue
+ cols = row.find_all('td')
+ cols = [ele.text.strip() for ele in cols]
+
+ # find bref ID in player link and add to data
+ cols.append(get_bref_id_from_player_link(player_link.get('href')))
+
+ data.append([ele for ele in cols])
+
+ # use headings for column names
+ return pd.DataFrame(data, columns=headings)
+
+
+@cache.df_cache()
+def appearances_bref(season:Optional[int] = None) -> pd.DataFrame:
+ """
+ Returns a pandas DataFrame of the defensive appearances for a given MLB season, or
+ appearances for the current / most recent season if the date is not specified.
+
+ ARGUMENTS
+ season (int): the year of the season
+ """
+ # get most recent standings if date not specified
+ if season is None:
+ season = most_recent_season()
+ if season < 1871:
+ raise ValueError(
+ "This query currently only returns appearances until the 1871 season. "
+ "Try looking at years from 1871 to present."
+ )
+
+ # retrieve html from baseball reference
+ soup = get_soup(season)
+ df = get_tables(soup, season)
+ return df
diff --git a/pybaseball/utils.py b/pybaseball/utils.py
index df24ea9d..056cb627 100644
--- a/pybaseball/utils.py
+++ b/pybaseball/utils.py
@@ -1,3 +1,4 @@
+import re
from collections import namedtuple
from datetime import date, datetime, timedelta
import functools
@@ -7,6 +8,7 @@
import pandas as pd
import requests
+from bs4 import Tag
from . import cache
@@ -84,6 +86,39 @@
{'WAS', 'WST'}
]
+ACTIVE_TEAMS = [
+ 'ARI',
+ 'ATH',
+ 'ATL',
+ 'BAL',
+ 'BOS',
+ 'CHC',
+ 'CHW',
+ 'CIN',
+ 'CLE',
+ 'COL',
+ 'DET',
+ 'HOU',
+ 'KCR',
+ 'LAA',
+ 'LAD',
+ 'MIA',
+ 'MIL',
+ 'MIN',
+ 'NYM',
+ 'NYY',
+ 'PHI',
+ 'PIT',
+ 'SDP',
+ 'SEA',
+ 'SFG',
+ 'STL',
+ 'TBR',
+ 'TEX',
+ 'TOR',
+ 'WSN'
+]
+
def get_first_season(team: str, include_equivalents: bool = True) -> Optional[int]:
if not include_equivalents:
return first_season_map[team]
@@ -385,3 +420,8 @@ def norm_positions(pos: Union[int, str], to_word: bool = False, to_number: bool
# lower() ok due to positional numbers being cast as strings when created
return normed.lower()
+# pull out bref ID from player page link using a regex
+def get_bref_id_from_player_link(player_link: str) -> str:
+
+ return re.search("players/[a-z]/([a-z0-9]+)\\.shtml", player_link).group(1)
+
diff --git a/tests/pybaseball/data/active_roster.html b/tests/pybaseball/data/active_roster.html
new file mode 100644
index 00000000..2ce3dcf4
--- /dev/null
+++ b/tests/pybaseball/data/active_roster.html
@@ -0,0 +1,3369 @@
+
+
+
+
Top 12 Players
+- Please note that players may not be in the uniform of the correct team in these images.
+
+