Source code for gridstatus.tests.test_eia

import numpy as np
import pandas as pd
import pytest

import gridstatus
from gridstatus.eia import EIA, HENRY_HUB_TIMEZONE


def _check_interchange(df):
    columns = [
        "Interval Start",
        "Interval End",
        "From BA",
        "From BA Name",
        "To BA",
        "To BA Name",
        "MW",
    ]
    # assert interval start and interval end are datetimes in utc
    assert df["Interval Start"].dtype == "datetime64[ns, UTC]"
    assert df["Interval End"].dtype == "datetime64[ns, UTC]"
    assert df.shape[0] > 0
    assert df.columns.tolist() == columns


def _check_region_data(df):
    columns = [
        "Interval Start",
        "Interval End",
        "Respondent",
        "Respondent Name",
        "Load",
        "Load Forecast",
        "Net Generation",
        "Total Interchange",
    ]

    assert df["Interval Start"].dtype == "datetime64[ns, UTC]"
    assert df["Interval End"].dtype == "datetime64[ns, UTC]"
    assert df.shape[0] > 0
    assert df.columns.tolist() == columns


def _check_region_subba_data(df):
    columns = [
        "Interval Start",
        "Interval End",
        "BA",
        "BA Name",
        "Subregion",
        "Subregion Name",
        "MW",
    ]

    assert df["Interval Start"].dtype == "datetime64[ns, UTC]"
    assert df["Interval End"].dtype == "datetime64[ns, UTC]"
    assert df.shape[0] > 0
    assert df.columns.tolist() == columns


def _check_fuel_type(df, subset=False):
    columns = [
        "Interval Start",
        "Interval End",
        "Respondent",
        "Respondent Name",
        "Coal",
        "Hydro",
        "Natural Gas",
        "Nuclear",
        "Other",
        "Petroleum",
        "Solar",
        "Wind",
    ]

    assert df["Interval Start"].dtype == "datetime64[ns, UTC]"
    assert df["Interval End"].dtype == "datetime64[ns, UTC]"
    assert df.shape[0] > 0
    if subset is False:
        assert df.columns.tolist() == columns

    # for balancing areas that don't have all forms of generation
    else:
        for column in df.columns.tolist():
            assert column in columns


[docs]def test_list_routes(): eia = gridstatus.EIA() routes = eia.list_routes("electricity/rto/") assert "interchange-data" in [r["id"] for r in routes["routes"]]
[docs]def test_list_facets(): eia = gridstatus.EIA() facets = eia.list_facets("electricity/rto/region-data") assert "type" in facets.keys()
[docs]def test_rto_interchange(): eia = gridstatus.EIA() start = "2020-01-01" end = "2020-01-04" df = eia.get_dataset( dataset="electricity/rto/interchange-data", start=start, end=end, verbose=True, ) assert df["Interval End"].min().date() == pd.Timestamp(start).date() assert df["Interval End"].max().date() == pd.Timestamp(end).date() assert df.isnull().sum().sum() == 0 _check_interchange(df)
[docs]def test_rto_region_data(): eia = gridstatus.EIA() start = "2020-01-01" end = "2020-01-04" df = eia.get_dataset( dataset="electricity/rto/region-data", start=start, end=end, verbose=True, ) assert df["Interval End"].min().date() == pd.Timestamp(start).date() assert df["Interval End"].max().date() == pd.Timestamp(end).date() # pick a respondent that we know has no nulls # this check that pagination is working assert df[df["Respondent"] == "BPAT"].isnull().sum().sum() == 0 _check_region_data(df)
[docs]def test_rto_region_subba_data(): eia = gridstatus.EIA() start = "2020-01-01" end = "2020-01-04" df = eia.get_dataset( dataset="electricity/rto/region-sub-ba-data", start=start, end=end, verbose=True, ) assert df["Interval End"].min().date() == pd.Timestamp(start).date() assert df["Interval End"].max().date() == pd.Timestamp(end).date() # pick a respondent that we know has no nulls # this check that pagination is working assert df[df["Subregion"] == "PGAE"].isnull().sum().sum() == 0 _check_region_subba_data(df)
[docs]def test_fuel_type(): eia = gridstatus.EIA() start = pd.Timestamp.now() - pd.Timedelta(days=7) end = start + pd.Timedelta(days=3) # dataset that doesnt have a handler yet df = eia.get_dataset( dataset="electricity/rto/fuel-type-data", start=start, end=end, verbose=True, ) assert df.isnull().sum().sum() == 0 _check_fuel_type(df)
[docs]def test_facets(): eia = gridstatus.EIA() start = "2020-01-01" end = "2020-01-04" # dataset that doesnt have a handler yet df = eia.get_dataset( dataset="electricity/rto/fuel-type-data", start=start, end=end, verbose=True, facets={"respondent": ["PACE"]}, ) assert df.isnull().sum().sum() == 0 assert all( respondent_name == "PacifiCorp East" for respondent_name in df["Respondent Name"] ) _check_fuel_type(df, subset=True)
[docs]def test_daily_spots_and_futures(): eia = gridstatus.EIA(api_key="abcd") # no need for API key to scrape. d = eia.get_daily_spots_and_futures() cols_petrol = [ "date", "product", "area", "price", "percent_change", ] assert d["petroleum"].columns.tolist() == cols_petrol assert d["petroleum"].shape[0] > 0 cols_ng = [ "date", "region", "natural_gas_price", "natural_gas_percent_change", "electricity_price", "electricity_percent_change", "spark_spread", ] assert d["natural_gas"].columns.tolist() == cols_ng assert d["natural_gas"].shape[0] > 0
[docs]def test_get_coal_spots(): eia = gridstatus.EIA(api_key="abcd") # no need for API key to scrape. d = eia.get_coal_spots() cols_spot_price = [ "week_ending_date", "central_appalachia_price_short_ton", "northern_appalachia_price_short_ton", "illinois_basin_price_short_ton", "powder_river_basin_price_short_ton", "uinta_basin_price_short_ton", "central_appalachia_price_mmbtu", "northern_appalachia_price_mmbtu", "illinois_basin_price_mmbtu", "powder_river_basin_price_mmbtu", "uinta_basin_price_mmbtu", ] cols_coal = [ "delivery_month", "coal_min", "coal_max", "coal_exports", ] cols_coke = [ "delivery_month", "coke_min", "coke_max", "coke_exports", ] assert d["weekly_spots"].columns.tolist() == cols_spot_price assert d["weekly_spots"].shape[0] > 0 assert d["coal_exports"].columns.tolist() == cols_coal assert d["coal_exports"].shape[0] > 0 assert d["coke_exports"].columns.tolist() == cols_coke assert d["coke_exports"].shape[0] > 0
@pytest.mark.slow
[docs]def test_eia_grid_monitor(): eia = gridstatus.EIA() cols = [ "Interval Start", "Interval End", "Area Id", "Area Name", "Area Type", "Demand", "Demand Forecast", "Net Generation", "Total Interchange", "NG: COL", "NG: NG", "NG: NUC", "NG: OIL", "NG: WAT", "NG: SUN", "NG: WND", "NG: UNK", "NG: OTH", "Positive Generation", "Consumed Electricity", "CO2 Factor: COL", "CO2 Factor: NG", "CO2 Factor: OIL", "CO2 Emissions: COL", "CO2 Emissions: NG", "CO2 Emissions: OIL", "CO2 Emissions: Other", "CO2 Emissions Generated", "CO2 Emissions Imported", "CO2 Emissions Exported", "CO2 Emissions Consumed", "CO2 Emissions Intensity for Generated Electricity", "CO2 Emissions Intensity for Consumed Electricity", ] df = eia.get_grid_monitor(area_id="CISO") assert df.columns.tolist() == cols
def _check_henry_hub_natural_gas_spot_prices(df): assert df.columns.tolist() == [ "Interval Start", "Interval End", "period", "duoarea", "area_name", "product", "fuel_type", "process", "price_type", "series", "series_description", "price", "units", ] assert (df["Interval End"] - df["Interval Start"]).unique() == pd.Timedelta(days=1) # Only RNGWHHD is present after 2024-04-05 assert set(df["series"].unique()) == set( [ "RNGWHHD", "RNGC1", "RNGC2", "RNGC3", "RNGC4", ], ) assert df["area_name"].isna().any() assert not df["price"].isna().any() assert not df["series"].isna().any() assert np.issubdtype(df["price"], np.float64)
[docs]def test_get_henry_hub_natural_gas_spot_prices_historical_date(): df = EIA().get_henry_hub_natural_gas_spot_prices( "2024-01-02", "2024-01-02", ) _check_henry_hub_natural_gas_spot_prices(df) assert df["Interval Start"].min() == pd.Timestamp( "2024-01-02", tz=HENRY_HUB_TIMEZONE, ) assert df["Interval End"].max() == pd.Timestamp("2024-01-03", tz=HENRY_HUB_TIMEZONE)
[docs]def test_get_henry_hub_natural_gas_spot_prices_historical_date_range(): df = EIA().get_henry_hub_natural_gas_spot_prices( "2023-12-04", "2024-01-02", ) _check_henry_hub_natural_gas_spot_prices(df) assert df["Interval Start"].min() == pd.Timestamp( "2023-12-04", tz=HENRY_HUB_TIMEZONE, ) assert df["Interval End"].max() == pd.Timestamp("2024-01-03", tz=HENRY_HUB_TIMEZONE)