Source code for gridstatus.eia

import concurrent.futures
import json
import os

import pandas as pd
import requests
from tqdm import tqdm

import gridstatus
from gridstatus.gs_logging import log


[docs]class EIA: BASE_URL = "https://api.eia.gov/v2/" def __init__(self, api_key=None): """Initialize EIA API object Args: api_key (str, optional): EIA API key. If not provided, will look for EIA_API_KEY environment variable. """ if api_key is None: api_key = os.environ.get("EIA_API_KEY") self.api_key = api_key if api_key is None: raise ValueError( "API key not provided and EIA_API_KEY \ not found in environment variables.", ) self.api_key = api_key self.session = requests.Session()
[docs] def list_routes(self, route="/"): """List all available routes""" url = f"{self.BASE_URL}{route}" params = { "api_key": self.api_key, } data = self.session.get(url, params=params) response = data.json()["response"] return response
def _fetch_page(self, url, headers): data = self.session.get(url, headers=headers) response = data.json()["response"] df = pd.DataFrame(response["data"]) return df, response["total"]
[docs] def get_dataset(self, dataset, start, end, n_workers=1, verbose=False): """Get data from a dataset Only supports "electricity/rto/interchange-data" dataset for now. Args: dataset (str): Dataset path start (str or pd.Timestamp): Start date end (str or pd.Timestamp): End date n_workers (int, optional): Number of workers to use for fetching data. Defaults to 1. verbose (bool, optional): Whether to print progress. Defaults to False. Returns: pd.DataFrame: Dataframe with data from the dataset """ start = gridstatus.utils._handle_date(start, "UTC") start_str = start.strftime("%Y-%m-%dT%H") end_str = None if end: end = gridstatus.utils._handle_date(end, "UTC") end_str = end.strftime("%Y-%m-%dT%H") url = f"{self.BASE_URL}{dataset}/data/" params = { "start": start_str, "end": end_str, "frequency": "hourly", "data": [ "value", ], "facets": {}, "offset": 0, "length": 5000, } headers = { "X-Api-Key": self.api_key, "X-Params": json.dumps(params), } log(f"Fetching data from {url}", verbose=verbose) log(f"Params: {params}", verbose=verbose) log( f"Concurrent workers: {n_workers}", verbose=verbose, ) raw_df, total_records = self._fetch_page(url, headers) # Calculate the number of pages page_size = 5000 total_pages = (total_records + page_size - 1) // page_size if verbose: print(f"Total records: {total_records}") print(f"Total pages: {total_pages}") print("Fetching data:") # Fetch the remaining pages if necessary def fetch_page_wrapper(url, headers, page, page_size): params = json.loads(headers["X-Params"]) params["offset"] = page * page_size headers["X-Params"] = json.dumps(params) page_df, _ = self._fetch_page(url, headers) return page_df if total_pages > 1: pages = range(1, total_pages) with concurrent.futures.ThreadPoolExecutor( max_workers=n_workers, ) as executor: # noqa args = ((url, headers.copy(), page, page_size) for page in pages) futures = [executor.submit(fetch_page_wrapper, *arg) for arg in args] if verbose: with tqdm(total=total_pages, ncols=80) as progress_bar: # for first page done at beginning progress_bar.update(1) for future, page in zip( concurrent.futures.as_completed(futures), pages, ): # noqa progress_bar.update(1) page_dfs = [future.result() for future in futures] raw_df = pd.concat([raw_df, *page_dfs], ignore_index=True) df = raw_df.copy() if dataset in DATASET_HANDLERS: df = DATASET_HANDLERS[dataset](df) return df
def _handle_rto_interchange(df): """electricity/rto/interchange-data""" df["Interval End"] = pd.to_datetime(df["period"], utc=True) df["Interval Start"] = df["Interval End"] - pd.Timedelta("1h") df = df.rename( { "value": "MW", "fromba": "From BA", "toba": "To BA", "fromba-name": "From BA Name", "toba-name": "To BA Name", }, axis=1, ) df = df[ [ "Interval Start", "Interval End", "From BA", "From BA Name", "To BA", "To BA Name", "MW", ] ] df = df.sort_values(["Interval Start", "From BA"]) return df
[docs]DATASET_HANDLERS = { "electricity/rto/interchange-data": _handle_rto_interchange, }
# docs # https://www.eia.gov/opendata/documentation.php