Source code for gridstatus.spp

from typing import BinaryIO

import pandas as pd
import requests
import tqdm
from bs4 import BeautifulSoup

from gridstatus import utils
from gridstatus.base import InterconnectionQueueStatus, ISOBase, Markets, NotSupported
from gridstatus.decorators import FiveMinOffset, support_date_range
from gridstatus.gs_logging import log
from gridstatus.lmp_config import lmp_config

[docs]FS_RTBM_LMP_BY_LOCATION = "rtbm-lmp-by-location"
[docs]FS_DAM_LMP_BY_LOCATION = "da-lmp-by-location"
[docs]MARKETPLACE_BASE_URL = "https://portal.spp.org"
[docs]FILE_BROWSER_API_URL = "https://portal.spp.org/file-browser-api/"
[docs]FILE_BROWSER_DOWNLOAD_URL = "https://portal.spp.org/file-browser-api/download"
[docs]BASE_SOLAR_AND_WIND_SHORT_TERM_URL = ( f"{FILE_BROWSER_DOWNLOAD_URL}/shortterm-resource-forecast?path=" )
[docs]BASE_SOLAR_AND_WIND_MID_TERM_URL = ( f"{FILE_BROWSER_DOWNLOAD_URL}/midterm-resource-forecast?path=" )
[docs]BASE_LOAD_FORECAST_SHORT_TERM_URL = f"{FILE_BROWSER_DOWNLOAD_URL}/stlf-vs-actual?path="
[docs]BASE_LOAD_FORECAST_MID_TERM_URL = f"{FILE_BROWSER_DOWNLOAD_URL}/mtlf-vs-actual?path="
[docs]LOCATION_TYPE_ALL = "ALL"
[docs]LOCATION_TYPE_HUB = "Hub"
[docs]LOCATION_TYPE_INTERFACE = "Interface"
[docs]LOCATION_TYPE_SETTLEMENT_LOCATION = "Settlement Location"
[docs]QUERY_RTM5_HUBS_URL = "https://pricecontourmap.spp.org/arcgis/rest/services/MarketMaps/RTBM_FeatureData/MapServer/1/query" # noqa
[docs]QUERY_RTM5_INTERFACES_URL = "https://pricecontourmap.spp.org/arcgis/rest/services/MarketMaps/RTBM_FeatureData/MapServer/2/query" # noqa
[docs]RELIABILITY_LEVELS = [ "Normal Operations", "Weather Advisory", "Resource Advisory", "Conservative Operations Advisory", "Energy Emergency Alert Level 1", "Energy Emergency Alert Level 2", "Energy Emergency Alert Level 3", "Restoration Event", ]
[docs]LAST_UPDATED_KEYWORDS = [ "last updated", "as of", ]
[docs]RELIABILITY_LEVELS_ALIASES = { "Normal Operations": "Normal", }
[docs]STATUS_STOP_WORDS = [ "as", "at", "ct", # central time "eea", # energy emergency alert "of", "on", ]
[docs]class SPP(ISOBase): """Southwest Power Pool (SPP)""" name = "Southwest Power Pool" iso_id = "spp" default_timezone = "US/Central" status_homepage = "https://www.spp.org/markets-operations/current-grid-conditions/" interconnection_homepage = ( "https://www.spp.org/engineering/generator-interconnection/" ) markets = [ Markets.REAL_TIME_5_MIN, Markets.DAY_AHEAD_HOURLY, ] location_types = [ LOCATION_TYPE_ALL, LOCATION_TYPE_HUB, LOCATION_TYPE_INTERFACE, LOCATION_TYPE_SETTLEMENT_LOCATION, ] @staticmethod
[docs] def now(): return pd.Timestamp.now(tz=SPP.default_timezone)
[docs] def get_fuel_mix(self, date, detailed=False, verbose=False): """Get fuel mix Args: date: supports today and latest detailed: if True, breaks out self scheduled and market scheduled Note: if today, returns last 2 hours of data. maybe include previous day Returns: pd.DataFrame: fuel mix """ if date == "latest": return self.get_fuel_mix( "today", detailed=detailed, verbose=verbose, ).reset_index(drop=True) if not utils.is_today(date, self.default_timezone): # https://marketplace.spp.org/pages/generation-mix-historical # many years of historical 5 minute data raise NotSupported url = f"{FILE_BROWSER_DOWNLOAD_URL}/generation-mix-historical?path=/GenMix2Hour.csv" # noqa df_raw = pd.read_csv(url) historical_mix = process_gen_mix(df_raw, detailed=detailed) historical_mix = historical_mix.drop( columns=["Short Term Load Forecast", "Average Actual Load"], errors="ignore", ) return historical_mix
[docs] def get_load(self, date, verbose=False): """Returns load for last 24hrs in 5 minute intervals""" original_date = date if date == "latest": date = "today" date = utils._handle_date(date, self.default_timezone) df = self._get_load_and_forecast(verbose=verbose) df = df.dropna(subset=["Actual Load"]) df = df.rename(columns={"Actual Load": "Load"}) df = df[["Time", "Load"]] df = df.reset_index(drop=True) df = add_interval(df, interval_min=5) if original_date == "latest": return df elif utils.is_today(original_date, tz=self.default_timezone): # returns two days, so make sure to only return current day's load df = df[df["Time"].dt.date == date.date()].reset_index(drop=True) return df else: # hourly historical zonal loads # https://marketplace.spp.org/pages/hourly-load # five minute actual load available here: https://portal.spp.org/pages/stlf-vs-actual# raise NotSupported()
[docs] def get_load_forecast(self, date, forecast_type="MID_TERM", verbose=False): """Returns load forecast for next 7 days in hourly intervals Arguments: forecast_type (str): MID_TERM is hourly for next 7 days or SHORT_TERM is every five minutes for a few hours Returns: pd.DataFrame: forecast for current day """ df = self._get_load_and_forecast(verbose=verbose) # gives forecast from before current day # only include forecasts starting at current day last_actual = df.dropna(subset=["Actual Load"])["Time"].max() current_day = last_actual.replace(hour=0, minute=0) current_day_forecast = df[df["Time"] >= current_day].copy() # assume forecast is made at last actual current_day_forecast["Forecast Time"] = last_actual if forecast_type == "MID_TERM": forecast_col = "Mid-Term Forecast" elif forecast_type == "SHORT_TERM": forecast_col = "Short-Term Forecast" else: raise RuntimeError("Invalid forecast type") # there will be empty rows regardless of forecast type since they dont align current_day_forecast = current_day_forecast.dropna( subset=[forecast_col], ) current_day_forecast = current_day_forecast[ ["Forecast Time", "Time", forecast_col] ].rename({forecast_col: "Load Forecast"}, axis=1) current_day_forecast = add_interval( current_day_forecast, interval_min=60, ) return current_day_forecast
@support_date_range("5_MIN")
[docs] def get_load_forecast_short_term(self, date, end=None, verbose=False): """ 5-minute load forecast data for the SPP footprint (system-wide) for +/- 10 minutes. Also includes actual load. Data from https://portal.spp.org/pages/stlf-vs-actual Arguments: date (pd.Timestamp|str): date to get data for. Supports "latest" and "today" verbose (bool): print info Returns: pd.DataFrame: forecast as dataframe. """ # The short_term forecast is delayed up to 2 minutes. buffer_minutes = 2 if date == "latest": date = self.now() - pd.Timedelta(minutes=buffer_minutes) # Files do not exist in the future if date > self.now(): return url = self._short_term_load_forecast_url(date.floor("5T")) log(f"Downloading {url}", verbose=verbose) df = pd.read_csv(url) # According to the docs, the end time col should be GMTIntervalEnd, but it's # only GMTInterval in the data df = self._post_process_load_forecast( df, url, forecast_type="SHORT_TERM", forecast_col="STLF", end_time_col="GMTInterval", interval_duration=pd.Timedelta(minutes=5), ) return df
@support_date_range("HOUR_START")
[docs] def get_load_forecast_mid_term(self, date, end=None, verbose=False): """ Returns load forecast for +7 days in hourly intervals. Includes actual load for the past 24 hours. Data from https://portal.spp.org/pages/mtlf-vs-actual Arguments: date (pd.Timestamp|str): date to get data for. Supports "latest" and "today" verbose (bool): print info Returns: pd.DataFrame: forecast as dataframe. """ # The MID_TERM forecast is delayed up to 10 minutes. buffer_minutes = 10 if date == "latest": date = self.now() - pd.Timedelta(minutes=buffer_minutes) if date > self.now(): return url = self._mid_term_load_forecast_url(date.floor("H")) log(f"Downloading {url}", verbose=verbose) df = pd.read_csv(url) df = self._post_process_load_forecast( df, url, forecast_type="MID_TERM", forecast_col="MTLF", end_time_col="GMTIntervalEnd", interval_duration=pd.Timedelta(hours=1), ) return df
def _post_process_load_forecast( self, df, url, forecast_type, forecast_col, end_time_col, interval_duration, ): df = self._handle_market_end_to_interval(df, end_time_col, interval_duration) # Assume the publish time is in the name of the file. There are different # times on the webpage, but these could be the posting time. df["Publish Time"] = pd.Timestamp( url.split("-")[-1].split(".")[0], tz=self.default_timezone, ) df.columns = [col.strip() for col in df.columns] df["Forecast Type"] = forecast_type df = ( utils.move_cols_to_front( df, ["Interval Start", "Interval End", "Publish Time", "Forecast Type"], ) .drop(columns=["Time", "Interval"]) .sort_values(["Interval Start", "Publish Time"]) ) return df.dropna(subset=[forecast_col]).reset_index(drop=True) @support_date_range("5_MIN")
[docs] def get_solar_and_wind_forecast_short_term(self, date, end=None, verbose=False): """ Returns solar and wind generation forecast for +4 hours in 5 minute intervals. Include actuals for past day in 5 minute intervals. Data from https://portal.spp.org/pages/shortterm-resource-forecast Arguments: date (pd.Timestamp|str): date to get data for. Supports "latest" and "today" verbose (bool): print info Returns: pd.DataFrame: forecast as dataframe. """ # The short_term forecast is delayed up to 2 minutes. buffer_minutes = 2 if date == "latest": date = self.now() - pd.Timedelta(minutes=buffer_minutes) # Files do not exist in the future if date > self.now(): return url = self._short_term_solar_and_wind_url(date.floor("5T")) log(f"Downloading {url}", verbose=verbose) df = pd.read_csv(url) # According to the docs, the end time col should be GMTIntervalEnd, but it's # only GMTInterval in the data df = self._post_process_solar_and_wind_forecast( df, url, forecast_type="SHORT_TERM", end_time_col="GMTInterval", interval_duration=pd.Timedelta(minutes=5), ) return df
@support_date_range("HOUR_START")
[docs] def get_solar_and_wind_forecast_mid_term(self, date, end=None, verbose=False): """ Returns solar and wind generation forecast for +7 days in hourly intervals. Data from https://portal.spp.org/pages/midterm-resource-forecast. Arguments: date (pd.Timestamp|str): date to get data for. Supports "latest" and "today" verbose (bool): print info Returns: pd.DataFrame: forecast as dataframe. """ # The MID_TERM forecast is delayed up to 10 minutes. buffer_minutes = 10 if date == "latest": date = self.now() - pd.Timedelta(minutes=buffer_minutes) if date > self.now(): return url = self._mid_term_solar_and_wind_url(date.floor("H")) log(f"Downloading {url}", verbose=verbose) df = pd.read_csv(url) df = self._post_process_solar_and_wind_forecast( df, url, forecast_type="MID_TERM", end_time_col="GMTIntervalEnd", interval_duration=pd.Timedelta(hours=1), ) return df
def _post_process_solar_and_wind_forecast( self, df, url, forecast_type, end_time_col, interval_duration, ): df = self._handle_market_end_to_interval(df, end_time_col, interval_duration) # Assume the publish time is in the name of the file. There are different # times on the webpage, but these could be the posting time. df["Publish Time"] = pd.Timestamp( url.split("-")[-1].split(".")[0], tz=self.default_timezone, ) df.columns = [col.strip() for col in df.columns] df["Forecast Type"] = forecast_type df = ( utils.move_cols_to_front( df, ["Interval Start", "Interval End", "Publish Time", "Forecast Type"], ) .drop(columns=["Time", "Interval"]) .sort_values(["Interval Start", "Publish Time"]) ) return df.dropna(subset=["Wind Forecast MW", "Solar Forecast MW"]).reset_index( drop=True, ) def _short_term_solar_and_wind_url(self, date): hour = date.hour padded_hour = str(hour).zfill(2) padded_hour_plus_one = str((hour + 1) % 24).zfill(2) # The first hour in the URL is 1 after the hour in the filename. # Example 2024/01/01/02 has data for 01/01/2024 01:00:00 - 01/01/2024 01:55:00 return BASE_SOLAR_AND_WIND_SHORT_TERM_URL + date.strftime( f"/%Y/%m/%d/{padded_hour_plus_one}/OP-STRF-%Y%m%d{padded_hour}%M.csv", ) def _mid_term_solar_and_wind_url(self, date): # Explicitly set the minutes to 00. return BASE_SOLAR_AND_WIND_MID_TERM_URL + date.strftime( "/%Y/%m/%d/OP-MTRF-%Y%m%d%H00.csv", ) def _short_term_load_forecast_url(self, date): hour = date.hour padded_hour = str(hour).zfill(2) padded_hour_plus_one = str((hour + 1) % 24).zfill(2) # The first hour in the URL is 1 after the hour in the filename. # Example 2024/01/01/02 has data for 01/01/2024 01:00:00 - 01/01/2024 01:55:00 return BASE_LOAD_FORECAST_SHORT_TERM_URL + date.strftime( f"/%Y/%m/%d/{padded_hour_plus_one}/OP-STLF-%Y%m%d{padded_hour}%M.csv", ) def _mid_term_load_forecast_url(self, date): # Explicitly set the minutes to 00. return BASE_LOAD_FORECAST_MID_TERM_URL + date.strftime( "/%Y/%m/%d/OP-MTLF-%Y%m%d%H00.csv", ) def _handle_market_end_to_interval(self, df, column, interval_duration): """Converts market end time to interval end time""" df = df.rename( columns={ column: "Interval End", }, ) df["Interval End"] = pd.to_datetime(df["Interval End"], utc=True).dt.tz_convert( self.default_timezone, ) df["Interval Start"] = df["Interval End"] - interval_duration df["Time"] = df["Interval Start"] df = utils.move_cols_to_front(df, ["Time", "Interval Start", "Interval End"]) return df def _process_ver_curtailments(self, df): df = df.rename( columns={ "WindRedispatchCurtailments": "Wind Redispatch Curtailments", "WindManualCurtailments": "Wind Manual Curtailments", "WindCurtailedForEnergy": "Wind Curtailed For Energy", "SolarRedispatchCurtailments": "Solar Redispatch Curtailments", "SolarManualCurtailments": "Solar Manual Curtailments", "SolarCurtailedForEnergy": "Solar Curtailed For Energy", }, ) df = self._handle_market_end_to_interval( df, column="GMTIntervalEnding", interval_duration=pd.Timedelta(minutes=5), ) cols = [ "Time", "Interval Start", "Interval End", "Wind Redispatch Curtailments", "Wind Manual Curtailments", "Wind Curtailed For Energy", "Solar Redispatch Curtailments", "Solar Manual Curtailments", "Solar Curtailed For Energy", ] # historical data doesnt have all columns for c in cols: if c not in df.columns: df[c] = pd.NA df = df[cols] return df @support_date_range("DAY_START")
[docs] def get_capacity_of_generation_on_outage(self, date, end=None, verbose=False): """Get Capacity of Generation on Outage. Published daily at 8am CT for next 7 days Args: date: start date end: end date """ url = f"{FILE_BROWSER_DOWNLOAD_URL}/capacity-of-generation-on-outage?path=/{date.strftime('%Y')}/{date.strftime('%m')}/Capacity-Gen-Outage-{date.strftime('%Y%m%d')}.csv" # noqa msg = f"Downloading {url}" log(msg, verbose) df = pd.read_csv(url) return self._process_capacity_of_generation_on_outage(df, publish_time=date)
[docs] def get_capacity_of_generation_on_outage_annual(self, year, verbose=True): """Get VER Curtailments for a year. Starting 2014. Recent data use get_capacity_of_generation_on_outage Args: year: year to get data for verbose: print url Returns: pd.DataFrame: VER Curtailments """ url = f"{FILE_BROWSER_DOWNLOAD_URL}/capacity-of-generation-on-outage?path=/{year}/{year}.zip" # noqa def process_csv(df, file_name): # infe date from '2020/01/Capacity-Gen-Outage-20200101.csv' publish_time_str = file_name.split(".")[0].split("-")[-1] publish_time = pd.to_datetime(publish_time_str).tz_localize( self.default_timezone, ) df = self._process_capacity_of_generation_on_outage(df, publish_time) return df df = utils.download_csvs_from_zip_url( url, process_csv=process_csv, verbose=verbose, ) df = df.sort_values("Interval Start") return df
def _process_capacity_of_generation_on_outage(self, df, publish_time): # strip whitespace from column names df = df.rename(columns=lambda x: x.strip()) df = self._handle_market_end_to_interval( df, column="Market Hour", interval_duration=pd.Timedelta(minutes=60), ) df = df.rename( columns={ "Outaged MW": "Total Outaged MW", }, ) publish_time = pd.to_datetime(publish_time.normalize()) df.insert(0, "Publish Time", publish_time) # drop Time column df = df.drop(columns=["Time"]) return df @support_date_range("DAY_START")
[docs] def get_ver_curtailments(self, date, end=None, verbose=False): """Get VER Curtailments Supports recent data. For historical annual data use get_ver_curtailments_annual Args: date: start date end: end date """ url = f"{FILE_BROWSER_DOWNLOAD_URL}/ver-curtailments?path=/{date.strftime('%Y')}/{date.strftime('%m')}/VER-Curtailments-{date.strftime('%Y%m%d')}.csv" # noqa msg = f"Downloading {url}" log(msg, verbose) df = pd.read_csv(url) return self._process_ver_curtailments(df)
[docs] def get_ver_curtailments_annual(self, year, verbose=True): """Get VER Curtailments for a year. Starting 2014. Recent data use get_ver_curtailments Args: year: year to get data for verbose: print url Returns: pd.DataFrame: VER Curtailments """ url = f"{FILE_BROWSER_DOWNLOAD_URL}/ver-curtailments?path=/{year}/{year}.zip" # noqa df = utils.download_csvs_from_zip_url(url, verbose=verbose) df = self._process_ver_curtailments(df) df = df[~df["Interval Start"].isnull()] df = df.sort_values("Time") return df
def _get_load_and_forecast(self, verbose=False): url = f"{MARKETPLACE_BASE_URL}/chart-api/load-forecast/asChart" msg = f"Getting load and forecast from {url}" log(msg, verbose) r = self._get_json(url)["response"] data = {"Time": r["labels"]} for d in r["datasets"][:3]: if d["label"] == "Actual Load": data["Actual Load"] = d["data"] elif d["label"] == "Mid-Term Load Forecast": data["Mid-Term Forecast"] = d["data"] elif d["label"] == "Short-Term Load Forecast": data["Short-Term Forecast"] = d["data"] df = pd.DataFrame(data) df["Time"] = pd.to_datetime( df["Time"], ).dt.tz_convert(self.default_timezone) return df # todo where does date got in argument order # def get_historical_lmp(self, date, market: str, nodes: list): # 5 minute interal data # {FILE_BROWSER_API_URL}/rtbm-lmp-by-location?path=/2022/08/By_Interval/08 # /RTBM-LMP-SL-202208082125.csv # historical generation mix # https://marketplace.spp.org/pages/generation-mix-rolling-365 # https://marketplace.spp.org/chart-api/gen-mix-365/asFile # 15mb file with five minute resolution
[docs] def get_raw_interconnection_queue(self, verbose=False) -> BinaryIO: url = "https://opsportal.spp.org/Studies/GenerateActiveCSV" msg = f"Getting interconnection queue from {url}" log(msg, verbose) response = requests.get(url) return utils.get_response_blob(response)
[docs] def get_interconnection_queue(self, verbose=False): """Get interconnection queue Returns: pandas.DataFrame: Interconnection queue """ raw_data = self.get_raw_interconnection_queue(verbose) queue = pd.read_csv(raw_data, skiprows=1) queue["Status (Original)"] = queue["Status"] completed_val = InterconnectionQueueStatus.COMPLETED.value active_val = InterconnectionQueueStatus.ACTIVE.value queue["Status"] = queue["Status"].map( { "IA FULLY EXECUTED/COMMERCIAL OPERATION": completed_val, "IA FULLY EXECUTED/ON SCHEDULE": completed_val, "IA FULLY EXECUTED/ON SUSPENSION": completed_val, "IA PENDING": active_val, "DISIS STAGE": active_val, "None": active_val, }, ) queue["Generation Type"] = queue[["Generation Type", "Fuel Type"]].apply( lambda x: " - ".join(x.dropna()), axis=1, ) queue["Proposed Completion Date"] = queue["Commercial Operation Date"] rename = { "Generation Interconnection Number": "Queue ID", " Nearest Town or County": "County", "State": "State", "TO at POI": "Transmission Owner", "Capacity": "Capacity (MW)", "MAX Summer MW": "Summer Capacity (MW)", "MAX Winter MW": "Winter Capacity (MW)", "Generation Type": "Generation Type", "Request Received": "Queue Date", "Substation or Line": "Interconnection Location", } # todo: there are a few columns being parsed # as "unamed" that aren't being included but should extra_columns = [ "In-Service Date", "Commercial Operation Date", "Cessation Date", "Current Cluster", "Cluster Group", "Replacement Generator Commercial Op Date", "Service Type", "Status (Original)", ] missing = [ "Project Name", "Interconnecting Entity", "Withdrawn Date", "Withdrawal Comment", "Actual Completion Date", ] queue = utils.format_interconnection_df( queue=queue, rename=rename, extra=extra_columns, missing=missing, ) return queue
@lmp_config( supports={ Markets.REAL_TIME_5_MIN: ["latest", "today", "historical"], Markets.DAY_AHEAD_HOURLY: ["latest", "today", "historical"], }, ) @support_date_range(frequency="DAY_START")
[docs] def get_lmp( self, date, end=None, market: str = None, location_type: str = LOCATION_TYPE_ALL, verbose=False, ): """Get LMP data Supported Markets: - ``REAL_TIME_5_MIN`` - ``DAY_AHEAD_HOURLY`` Supported Location Types: - ``Hub`` - ``Interface`` - ``ALL`` """ if market not in self.markets: raise NotSupported(f"Market {market} not supported") if location_type not in self.location_types: raise NotSupported(f"Location type {location_type} not supported") if market == Markets.REAL_TIME_5_MIN: df = self._get_rtm5_lmp( date, end, verbose, ) elif market == Markets.DAY_AHEAD_HOURLY: if date == "latest": raise ValueError("Latest not supported for Day Ahead Hourly") df = self._get_dam_lmp( date, verbose, ) return self._finalize_spp_df( df, market=market, location_type=location_type, verbose=verbose, )
def _get_feature_data(self, base_url, verbose=False): """Fetches data from ArcGIS Map Service with Feature Data Returns: pd.DataFrame of features """ args = { "f": "json", "where": "OBJECTID IS NOT NULL", "returnGeometry": "false", "outFields": "*", } doc = self._get_json(base_url, params=args, verbose=verbose) df = pd.DataFrame([feature["attributes"] for feature in doc["features"]]) return df def _get_rtm5_lmp( self, date, end=None, verbose=False, ): if date == "latest": urls = [ FILE_BROWSER_DOWNLOAD_URL + "/" + FS_RTBM_LMP_BY_LOCATION + "?path=%2FRTBM-LMP-SL-latestInterval.csv", ] else: urls = self._file_browser_list( fs_name=FS_RTBM_LMP_BY_LOCATION, type="folder", path=date.strftime("/%Y/%m/By_Interval/%d"), )["url"].tolist() msg = f"Found {len(urls)} files for {date}" log(msg, verbose) df = self._fetch_and_concat_csvs(urls, verbose=verbose) return df def _get_dam_lmp( self, date, verbose=False, ): url = f"{FILE_BROWSER_DOWNLOAD_URL}/{FS_DAM_LMP_BY_LOCATION}?path=/{date.strftime('%Y')}/{date.strftime('%m')}/By_Day/DA-LMP-SL-{date.strftime('%Y%m%d')}0100.csv" # noqa log(f"Downloading {url}", verbose=verbose) df = pd.read_csv(url) return df def _finalize_spp_df(self, df, market, location_type, verbose=False): """ Finalizes DataFrame: - Sets Market - Filters by location type if needed - Sets location type - Renames and ordering columns - Filters by Location - Resets the index Arguments: pandas.DataFrame: DataFrame with SPP data market (str): Market location_type (str): Location type verbose (bool, optional): Verbose output """ if market == Markets.REAL_TIME_5_MIN: interval_duration = pd.Timedelta(minutes=5) elif market == Markets.DAY_AHEAD_HOURLY: interval_duration = pd.Timedelta(hours=1) df = self._handle_market_end_to_interval( df, column="GMTIntervalEnd", interval_duration=interval_duration, ) df["Location"] = df["Settlement Location"] df["PNode"] = df["Pnode"] df["Market"] = market.value df["Location Type"] = LOCATION_TYPE_SETTLEMENT_LOCATION # Create boolean masks for each location type hubs = self._get_location_list(LOCATION_TYPE_HUB, verbose=verbose) interfaces = self._get_location_list(LOCATION_TYPE_INTERFACE, verbose=verbose) is_hub = df["Location"].isin(hubs) is_interface = df["Location"].isin(interfaces) df.loc[is_hub, "Location Type"] = LOCATION_TYPE_HUB df.loc[is_interface, "Location Type"] = LOCATION_TYPE_INTERFACE df = df.rename( columns={ "LMP": "LMP", # for posterity "MLC": "Loss", "MCC": "Congestion", "MEC": "Energy", }, ) df = utils.filter_lmp_locations(df, location_type=location_type) df = df[ [ "Time", "Interval Start", "Interval End", "Market", "Location", "Location Type", "PNode", "LMP", "Energy", "Congestion", "Loss", ] ] df = df.reset_index(drop=True) return df @support_date_range("5_MIN")
[docs] def get_operating_reserves(self, date, end=None, verbose=False): if date == "latest": url = f"{FILE_BROWSER_DOWNLOAD_URL}/operating-reserves?path=/RTBM-OR-latestInterval.csv" # noqa else: if end is None: end = date + FiveMinOffset() url = f"{FILE_BROWSER_DOWNLOAD_URL}/operating-reserves?path=/{date.strftime('%Y')}/{date.strftime('%m')}/{date.strftime('%d')}/RTBM-OR-{end.strftime('%Y%m%d%H%M')}.csv" # noqa msg = f"Downloading {url}" log(msg, verbose) df = pd.read_csv(url) return self._process_operating_reserves(df)
def _process_operating_reserves(self, df): df = self._handle_market_end_to_interval( df, column="GMTIntervalEnd", interval_duration=pd.Timedelta(minutes=5), ) # don't need this column df = df.drop(columns=["Interval"]) df = df.rename( columns={ "RegUP_Clr": "Reg_Up_Cleared", "RegDN_Clr": "Reg_Dn_Cleared", "RampUP_Clr": "Ramp_Up_Cleared", "RampDN_Clr": "Ramp_Dn_Cleared", "UncUP_Clr": "Unc_Up_Cleared", "STSUncUP_Clr": "STS_Unc_Up_Cleared", "Spin_Clr": "Spin_Cleared", "Supp_Clr": "Supp_Cleared", }, ) return df @support_date_range("DAY_START")
[docs] def get_day_ahead_operating_reserve_prices(self, date, end=None, verbose=False): """Provides Marginal Clearing Price information by Reserve Zone for each Day-Ahead Market solution for each Operating Day. Posting is updated each day after the DA Market results are posted. Available at https://portal.spp.org/pages/da-mcp# Args: date: date to get data for end: end date verbose: print url Returns: pd.DataFrame: Day Ahead Marginal Clearing Prices """ if date == "latest": raise ValueError( "Latest not supported for Day Ahead Marginal Clearing Prices", ) url = f"{FILE_BROWSER_DOWNLOAD_URL}/da-mcp?path=/{date.strftime('%Y')}/{date.strftime('%m')}/DA-MCP-{date.strftime('%Y%m%d')}0100.csv" # noqa msg = f"Downloading {url}" log(msg, verbose) df = pd.read_csv(url) return self._process_day_ahead_operating_reserve_prices(df)
def _process_day_ahead_operating_reserve_prices(self, df): df = self._handle_market_end_to_interval( df, column="GMTIntervalEnd", interval_duration=pd.Timedelta(hours=1), ).assign(Market="DAM") column_mapping = { "RegUP": "Reg_Up", "RegDN": "Reg_Dn", "RampUP": "Ramp_Up", "RampDN": "Ramp_Dn", "Spin": "Spin", "Supp": "Supp", "UncUP": "Unc_Up", } df = df.rename(columns=column_mapping) cols_to_keep = [ "Interval Start", "Interval End", "Market", "Reserve Zone", ] + list( column_mapping.values(), ) # Older datasets might not have all the reserve types return df[[c for c in cols_to_keep if c in df]] @support_date_range("5_MIN")
[docs] def get_lmp_real_time_weis(self, date, end=None, verbose=False): """Get LMP data for real time WEIS Args: date: date to get data for. if end is not provided, will get data for 5 minute interval that date is in. end: end date verbose: print url """ # if no end, find nearest 5 minute interval end # to use if date == "latest": url = f"{FILE_BROWSER_DOWNLOAD_URL}/lmp-by-settlement-location-weis?path=/WEIS-RTBM-LMP-SL-latestInterval.csv" # noqa else: if end is None: end = date + FiveMinOffset() # always round up to nearest 5 minutes # if already on 5 minute interval, this will do nothing end = end.ceil("5min") # todo before 2022 only annual files are available # folder path is based on start date # file name is based on end date url = f"{FILE_BROWSER_DOWNLOAD_URL}/lmp-by-settlement-location-weis?path=/{date.strftime('%Y')}/{date.strftime('%m')}/By_Interval/{date.strftime('%d')}/WEIS-RTBM-LMP-SL-{end.strftime('%Y%m%d%H%M')}.csv" # noqa # TODO: sometimes there are missing interval files (example: https://portal.spp.org/pages/lmp-by-settlement-location-weis#%2F2024%2F01%2FBy_Interval%2F21) # noqa # We can't do anything in these cases but log a message msg = f"Downloading {url}" log(msg, verbose) try: df = pd.read_csv(url) except ConnectionResetError as e: log(f"Error downloading {url}: {e}", verbose) return pd.DataFrame() return self._process_lmp_real_time_weis(df)
def _process_lmp_real_time_weis(self, df): # strip whitespace from column names df = df.rename(columns=lambda x: x.strip()) df = self._handle_market_end_to_interval( df, column="GMTIntervalEnd", interval_duration=pd.Timedelta(minutes=5), ) df["Location Type"] = LOCATION_TYPE_SETTLEMENT_LOCATION df["Market"] = "REAL_TIME_WEIS" df = df.rename( columns={ "Settlement Location": "Location", "Pnode": "PNode", "LMP": "LMP", # for posterity "MLC": "Loss", "MCC": "Congestion", "MEC": "Energy", }, ) df = df[ [ "Interval Start", "Interval End", "Market", "Location", "Location Type", "PNode", "LMP", "Energy", "Congestion", "Loss", ] ] return df def _get_location_list(self, location_type, verbose=False): if location_type == LOCATION_TYPE_HUB: df = self._get_feature_data(QUERY_RTM5_HUBS_URL, verbose=verbose) elif location_type == LOCATION_TYPE_INTERFACE: df = self._get_feature_data( QUERY_RTM5_INTERFACES_URL, verbose=verbose, ) else: raise ValueError(f"Invalid location_type: {location_type}") return df["SETTLEMENT_LOCATION"].unique().tolist() def _fetch_and_concat_csvs(self, urls: list, verbose: bool = False): all_dfs = [] for url in tqdm.tqdm(urls): msg = f"Fetching {url}" log(msg, verbose) df = pd.read_csv(url) all_dfs.append(df) return pd.concat(all_dfs) def _get_marketplace_session(self) -> dict: """ Returns a session object for the Marketplace API """ html = requests.get(MARKETPLACE_BASE_URL) jsessionid = html.cookies.get("JSESSIONID") soup = BeautifulSoup(html.content, "html.parser") csrf_token = soup.find("meta", {"id": "_csrf"}).attrs["content"] csrf_token_header = soup.find( "meta", {"id": "_csrf_header"}, ).attrs["content"] return { "cookies": {"JSESSIONID": jsessionid}, "headers": { csrf_token_header: csrf_token, }, } def _file_browser_list(self, fs_name: str, type: str, path: str): """Lists folders in a browser Returns: pd.DataFrame of files, or empty pd.DataFrame on error""" session = self._get_marketplace_session() json_payload = { "name": fs_name, "fsName": fs_name, "type": type, "path": path, } list_results = requests.post( FILE_BROWSER_API_URL, json=json_payload, headers=session["headers"], cookies=session["cookies"], ) if list_results.status_code == 200: df = pd.DataFrame(list_results.json()) df["url"] = ( FILE_BROWSER_DOWNLOAD_URL + "/" + fs_name + "?path=" + df["path"] ) return df else: return pd.DataFrame() @staticmethod def _match( needles, haystacks, needle_norm_fn=lambda x: x.lower(), haystack_norm_fn=lambda x: x.lower(), ): """Returns items from haystacks if any needles are in them""" return [ haystack for haystack in haystacks if any( needle_norm_fn(needle) in haystack_norm_fn(haystack) for needle in needles ) ]
[docs]def process_gen_mix(df, detailed=False): """Parse SPP generation mix data from https://marketplace.spp.org/pages/generation-mix-historical Args: df (pd.DataFrame): raw data detailed (bool): whether to combine market and self columns Returns: pd.DataFrame: processed data """ new_df = df.copy() # remove whitespace from column names new_df.columns = new_df.columns.str.strip() # rename columns to standardize new_df = new_df.rename( columns={ "GMTTime": "Time", "GMT MKT Interval": "Time", "Gas Self": "Natural Gas Self", # rename below is based on documenation "Load": "Short Term Load Forecast", }, ) # parse time new_df["Time"] = pd.to_datetime(new_df["Time"], utc=True).dt.tz_convert( SPP.default_timezone, ) # combine market and self columns columns_to_combine = [ "Coal", "Diesel Fuel Oil", "Hydro", "Natural Gas", "Nuclear", "Solar", "Waste Disposal Services", "Wind", "Waste Heat", "Other", ] if not detailed: for col in columns_to_combine: market_col = f"{col} Market" self_col = f"{col} Self" if market_col not in new_df.columns or self_col not in new_df.columns: continue new_df[col] = new_df[market_col] + new_df[self_col] new_df = new_df.drop([market_col, self_col], axis=1) new_df = add_interval(new_df, 5) return new_df
[docs]def add_interval(df, interval_min): """Adds Interval Start and Interval End columns to df""" df["Interval Start"] = df["Time"] df["Interval End"] = df["Interval Start"] + pd.Timedelta(minutes=interval_min) df = utils.move_cols_to_front( df, ["Time", "Interval Start", "Interval End"], ) return df