Source code for gridstatus.tests.test_eia

import numpy as np
import pandas as pd
import pytest

import gridstatus
from gridstatus.eia import EIA, HENRY_HUB_TIMEZONE


def _check_interchange(df):
    columns = [
        "Interval Start",
        "Interval End",
        "From BA",
        "From BA Name",
        "To BA",
        "To BA Name",
        "MW",
    ]
    # assert interval start and interval end are datetimes in utc
    assert df["Interval Start"].dtype == "datetime64[ns, UTC]"
    assert df["Interval End"].dtype == "datetime64[ns, UTC]"
    assert df.shape[0] > 0
    assert df.columns.tolist() == columns


def _check_region_data(df):
    columns = [
        "Interval Start",
        "Interval End",
        "Respondent",
        "Respondent Name",
        "Load",
        "Load Forecast",
        "Net Generation",
        "Total Interchange",
    ]

    assert df["Interval Start"].dtype == "datetime64[ns, UTC]"
    assert df["Interval End"].dtype == "datetime64[ns, UTC]"
    assert df.shape[0] > 0
    assert df.columns.tolist() == columns


def _check_region_subba_data(df):
    columns = [
        "Interval Start",
        "Interval End",
        "BA",
        "BA Name",
        "Subregion",
        "Subregion Name",
        "MW",
    ]

    assert df["Interval Start"].dtype == "datetime64[ns, UTC]"
    assert df["Interval End"].dtype == "datetime64[ns, UTC]"
    assert df.shape[0] > 0
    assert df.columns.tolist() == columns


def _check_fuel_type(df, subset=False):
    columns = [
        "Interval Start",
        "Interval End",
        "Respondent",
        "Respondent Name",
        "Coal",
        "Hydro",
        "Natural Gas",
        "Nuclear",
        "Other",
        "Petroleum",
        "Solar",
        "Wind",
    ]

    assert df["Interval Start"].dtype == "datetime64[ns, UTC]"
    assert df["Interval End"].dtype == "datetime64[ns, UTC]"
    assert df.shape[0] > 0
    if subset is False:
        assert df.columns.tolist() == columns

    # for balancing areas that don't have all forms of generation
    else:
        for column in df.columns.tolist():
            assert column in columns


[docs]def test_list_routes():
    eia = gridstatus.EIA()

    routes = eia.list_routes("electricity/rto/")

    assert "interchange-data" in [r["id"] for r in routes["routes"]]


[docs]def test_list_facets():
    eia = gridstatus.EIA()

    facets = eia.list_facets("electricity/rto/region-data")

    assert "type" in facets.keys()


[docs]def test_rto_interchange():
    eia = gridstatus.EIA()

    start = "2020-01-01"
    end = "2020-01-04"

    df = eia.get_dataset(
        dataset="electricity/rto/interchange-data",
        start=start,
        end=end,
        verbose=True,
    )

    assert df["Interval End"].min().date() == pd.Timestamp(start).date()
    assert df["Interval End"].max().date() == pd.Timestamp(end).date()
    assert df.isnull().sum().sum() == 0

    _check_interchange(df)


[docs]def test_rto_region_data():
    eia = gridstatus.EIA()
    start = "2020-01-01"
    end = "2020-01-04"

    df = eia.get_dataset(
        dataset="electricity/rto/region-data",
        start=start,
        end=end,
        verbose=True,
    )

    assert df["Interval End"].min().date() == pd.Timestamp(start).date()
    assert df["Interval End"].max().date() == pd.Timestamp(end).date()
    # pick a respondent that we know has no nulls
    # this check that pagination is working
    assert df[df["Respondent"] == "BPAT"].isnull().sum().sum() == 0
    _check_region_data(df)


[docs]def test_rto_region_subba_data():
    eia = gridstatus.EIA()
    start = "2020-01-01"
    end = "2020-01-04"

    df = eia.get_dataset(
        dataset="electricity/rto/region-sub-ba-data",
        start=start,
        end=end,
        verbose=True,
    )

    assert df["Interval End"].min().date() == pd.Timestamp(start).date()
    assert df["Interval End"].max().date() == pd.Timestamp(end).date()
    # pick a respondent that we know has no nulls
    # this check that pagination is working
    assert df[df["Subregion"] == "PGAE"].isnull().sum().sum() == 0
    _check_region_subba_data(df)


[docs]def test_fuel_type():
    eia = gridstatus.EIA()

    start = pd.Timestamp.now() - pd.Timedelta(days=7)
    end = start + pd.Timedelta(days=3)

    # dataset that doesnt have a handler yet
    df = eia.get_dataset(
        dataset="electricity/rto/fuel-type-data",
        start=start,
        end=end,
        verbose=True,
    )
    assert df.isnull().sum().sum() == 0

    _check_fuel_type(df)


[docs]def test_facets():
    eia = gridstatus.EIA()

    start = "2020-01-01"
    end = "2020-01-04"

    # dataset that doesnt have a handler yet
    df = eia.get_dataset(
        dataset="electricity/rto/fuel-type-data",
        start=start,
        end=end,
        verbose=True,
        facets={"respondent": ["PACE"]},
    )
    assert df.isnull().sum().sum() == 0
    assert all(
        respondent_name == "PacifiCorp East"
        for respondent_name in df["Respondent Name"]
    )

    _check_fuel_type(df, subset=True)


[docs]def test_daily_spots_and_futures():
    eia = gridstatus.EIA(api_key="abcd")  # no need for API key to scrape.

    d = eia.get_daily_spots_and_futures()

    cols_petrol = [
        "date",
        "product",
        "area",
        "price",
        "percent_change",
    ]

    assert d["petroleum"].columns.tolist() == cols_petrol
    assert d["petroleum"].shape[0] > 0
    cols_ng = [
        "date",
        "region",
        "natural_gas_price",
        "natural_gas_percent_change",
        "electricity_price",
        "electricity_percent_change",
        "spark_spread",
    ]

    assert d["natural_gas"].columns.tolist() == cols_ng
    assert d["natural_gas"].shape[0] > 0


[docs]def test_get_coal_spots():
    eia = gridstatus.EIA(api_key="abcd")  # no need for API key to scrape.

    d = eia.get_coal_spots()

    cols_spot_price = [
        "week_ending_date",
        "central_appalachia_price_short_ton",
        "northern_appalachia_price_short_ton",
        "illinois_basin_price_short_ton",
        "powder_river_basin_price_short_ton",
        "uinta_basin_price_short_ton",
        "central_appalachia_price_mmbtu",
        "northern_appalachia_price_mmbtu",
        "illinois_basin_price_mmbtu",
        "powder_river_basin_price_mmbtu",
        "uinta_basin_price_mmbtu",
    ]

    cols_coal = [
        "delivery_month",
        "coal_min",
        "coal_max",
        "coal_exports",
    ]

    cols_coke = [
        "delivery_month",
        "coke_min",
        "coke_max",
        "coke_exports",
    ]

    assert d["weekly_spots"].columns.tolist() == cols_spot_price
    assert d["weekly_spots"].shape[0] > 0

    assert d["coal_exports"].columns.tolist() == cols_coal
    assert d["coal_exports"].shape[0] > 0

    assert d["coke_exports"].columns.tolist() == cols_coke
    assert d["coke_exports"].shape[0] > 0


@pytest.mark.slow
[docs]def test_eia_grid_monitor():
    eia = gridstatus.EIA()
    cols = [
        "Interval Start",
        "Interval End",
        "Area Id",
        "Area Name",
        "Area Type",
        "Demand",
        "Demand Forecast",
        "Net Generation",
        "Total Interchange",
        "NG: COL",
        "NG: NG",
        "NG: NUC",
        "NG: OIL",
        "NG: WAT",
        "NG: SUN",
        "NG: WND",
        "NG: UNK",
        "NG: OTH",
        "Positive Generation",
        "Consumed Electricity",
        "CO2 Factor: COL",
        "CO2 Factor: NG",
        "CO2 Factor: OIL",
        "CO2 Emissions: COL",
        "CO2 Emissions: NG",
        "CO2 Emissions: OIL",
        "CO2 Emissions: Other",
        "CO2 Emissions Generated",
        "CO2 Emissions Imported",
        "CO2 Emissions Exported",
        "CO2 Emissions Consumed",
        "CO2 Emissions Intensity for Generated Electricity",
        "CO2 Emissions Intensity for Consumed Electricity",
    ]
    df = eia.get_grid_monitor(area_id="CISO")

    assert df.columns.tolist() == cols


def _check_henry_hub_natural_gas_spot_prices(df):
    assert df.columns.tolist() == [
        "Interval Start",
        "Interval End",
        "period",
        "duoarea",
        "area_name",
        "product",
        "fuel_type",
        "process",
        "price_type",
        "series",
        "series_description",
        "price",
        "units",
    ]

    assert (df["Interval End"] - df["Interval Start"]).unique() == pd.Timedelta(days=1)

    # Only RNGWHHD is present after 2024-04-05
    assert set(df["series"].unique()) == set(
        [
            "RNGWHHD",
            "RNGC1",
            "RNGC2",
            "RNGC3",
            "RNGC4",
        ],
    )

    assert df["area_name"].isna().any()
    assert not df["price"].isna().any()
    assert not df["series"].isna().any()

    assert np.issubdtype(df["price"], np.float64)


[docs]def test_get_henry_hub_natural_gas_spot_prices_historical_date():
    df = EIA().get_henry_hub_natural_gas_spot_prices(
        "2024-01-02",
        "2024-01-02",
    )

    _check_henry_hub_natural_gas_spot_prices(df)

    assert df["Interval Start"].min() == pd.Timestamp(
        "2024-01-02",
        tz=HENRY_HUB_TIMEZONE,
    )
    assert df["Interval End"].max() == pd.Timestamp("2024-01-03", tz=HENRY_HUB_TIMEZONE)


[docs]def test_get_henry_hub_natural_gas_spot_prices_historical_date_range():
    df = EIA().get_henry_hub_natural_gas_spot_prices(
        "2023-12-04",
        "2024-01-02",
    )

    _check_henry_hub_natural_gas_spot_prices(df)

    assert df["Interval Start"].min() == pd.Timestamp(
        "2023-12-04",
        tz=HENRY_HUB_TIMEZONE,
    )
    assert df["Interval End"].max() == pd.Timestamp("2024-01-03", tz=HENRY_HUB_TIMEZONE)