Core Utilities
Imports
#exports
import numpy as np
import pandas as pd
import re
import os
import xmltodict
from collections import OrderedDict
from warnings import warn
from IPython.display import JSON
from IPython.core.magic import register_cell_magic
@register_cell_magic('warn_exceptions')
def warn_exceptions(line, cell):
try:
exec(cell)
except Exception as e:
warn(str(e))
import os
from dotenv import load_dotenv
assert load_dotenv('../.env'), 'Environment variables could not be loaded'
api_key = os.environ['BMRS_API_KEY']
import requests
r = requests.get(f'https://api.bmreports.com/BMRS/B1610/v2?ServiceType=XML&Period=1&APIKey={api_key}&SettlementDate=2020-01-01')
r
<Response [200]>
#exports
class RequestError(Exception):
def __init__(self, http_code, error_type, description):
self.message = f'{http_code} - {error_type}\n{description}'
def __str__(self):
return self.message
%%warn_exceptions
raise RequestError('400', 'Bad Request', 'You did something wrong')
<ipython-input-3-72f956aef7ec>:9: UserWarning: 400 - Bad Request
You did something wrong
warn(str(e))
#exports
def check_status(r):
r_metadata = xmltodict.parse(r.text)['response']['responseMetadata']
if r_metadata['httpCode'] == '204':
warn(f'Data request was succesful but no content was returned')
return pd.DataFrame()
elif r_metadata['httpCode'] != '200':
raise RequestError(r_metadata['httpCode'], r_metadata['errorType'], r_metadata['description'])
return None
def check_capping(r):
r_metadata = xmltodict.parse(r.text)['response']['responseMetadata']
if 'cappingApplied' in r_metadata.keys():
if r_metadata['cappingApplied'] == 'Yes':
capping_applied = True
else:
capping_applied = False
else:
capping_applied = 'Could not be determined'
return capping_applied
check_status(r)
check_capping(r)
False
#exports
def expand_cols(df, cols_2_expand=[]):
if df.size == 0:
return df
for col in cols_2_expand:
new_df_cols = df[col].apply(pd.Series)
df[new_df_cols.columns] = new_df_cols
df = df.drop(columns=col)
s_cols_2_expand = df.iloc[0].apply(type).isin([OrderedDict, dict, list, tuple])
if s_cols_2_expand.sum() > 0:
cols_2_expand = s_cols_2_expand[s_cols_2_expand].index
df = expand_cols(df, cols_2_expand)
return df
def parse_xml_response(r):
r_dict = xmltodict.parse(r.text)
status_check_response = check_status(r)
if status_check_response is not None:
return status_check_response
capping_applied = check_capping(r)
data_content = r_dict['response']['responseBody']['responseList']['item']
if isinstance(data_content, list):
df = expand_cols(pd.DataFrame(data_content))
elif isinstance(data_content, OrderedDict):
df = pd.DataFrame(pd.Series(data_content)).T
else:
raise ValueError('The returned `data_content` must be one of: `list` or `OrderedDict`')
return df
df = parse_xml_response(r)
df.head()
documentType |
businessType |
processType |
timeSeriesID |
curveType |
settlementDate |
powerSystemResourceType |
registeredResourceEICCode |
marketGenerationUnitEICCode |
marketGenerationBMUId |
... |
bMUnitID |
nGCBMUnitID |
activeFlag |
documentID |
documentRevNum |
resolution |
start |
end |
settlementPeriod |
quantity |
Actual generation |
Production |
Realised |
ELX-EMFIP-AGOG-TS-261 |
Sequential fixed size block |
2020-01-01 |
Generation |
48W00000EWHLW-1U |
48W00000EWHLW-1U |
T_EWHLW-1 |
... |
T_EWHLW-1 |
EWHLW-1 |
Y |
ELX-EMFIP-AGOG-22495386 |
1 |
PT30M |
2020-01-01 |
2020-01-01 |
1 |
1.87 |
Actual generation |
Production |
Realised |
ELX-EMFIP-AGOG-TS-202 |
Sequential fixed size block |
2020-01-01 |
Generation |
48W00000TULWW-1U |
48W00000TULWW-1U |
E_TULWW-1 |
... |
E_TULWW-1 |
TULWW-1 |
Y |
ELX-EMFIP-AGOG-22495386 |
1 |
PT30M |
2020-01-01 |
2020-01-01 |
1 |
15.618 |
Actual generation |
Production |
Realised |
ELX-EMFIP-AGOG-TS-352 |
Sequential fixed size block |
2020-01-01 |
Generation |
48W000000STAY-4S |
48W000000STAY-4S |
T_STAY-4 |
... |
T_STAY-4 |
STAY-4 |
Y |
ELX-EMFIP-AGOG-22495386 |
1 |
PT30M |
2020-01-01 |
2020-01-01 |
1 |
195.8 |
Actual generation |
Production |
Realised |
ELX-EMFIP-AGOG-TS-359 |
Sequential fixed size block |
2020-01-01 |
Generation |
48W000000TORN-1G |
48W000000TORN-1G |
T_TORN-1 |
... |
T_TORN-1 |
TORN-1 |
Y |
ELX-EMFIP-AGOG-22495386 |
1 |
PT30M |
2020-01-01 |
2020-01-01 |
1 |
631.456 |
Actual generation |
Production |
Realised |
ELX-EMFIP-AGOG-TS-193 |
Sequential fixed size block |
2020-01-01 |
Generation |
48W00000LNMTH-1R |
48W00000LNMTH-1R |
E_LYNE1 |
... |
E_LYNE1 |
LNMTH-1 |
Y |
ELX-EMFIP-AGOG-22495386 |
1 |
PT30M |
2020-01-01 |
2020-01-01 |
1 |
130.298 |
#exports
def dt_rng_to_SPs(
start_date: pd.Timestamp,
end_date: pd.Timestamp,
freq: str='30T',
tz: str='Europe/London'
):
dt_rng = pd.date_range(start_date, end_date, freq=freq, tz=tz)
SPs = list((2*(dt_rng.hour + dt_rng.minute/60) + 1).astype(int))
dt_strs = list(dt_rng.strftime('%Y-%m-%d'))
df_dates_SPs = pd.DataFrame({'date':dt_strs, 'SP':SPs}, index=dt_rng).astype(str)
# Accounting for clock changes
clock_change_dt_idxs_dir = pd.Series(dt_rng).apply(lambda dt: dt.utcoffset().total_seconds()).diff().replace(0, np.nan).dropna()
for dt_idx, dir_ in clock_change_dt_idxs_dir.items():
dt = dt_rng[dt_idx].date()
SPs = (1 + 2*(dt_rng[dt_rng.date==dt] - pd.to_datetime(dt).tz_localize('Europe/London')).total_seconds()/(60*60)).astype(int)
df_dates_SPs.loc[df_dates_SPs.index.date==dt, 'SP'] = SPs
return df_dates_SPs
def parse_local_datetime(
df: pd.DataFrame,
dt_col: str='settlementDate',
SP_col: str='settlementPeriod',
freq: str='30T',
tz: str='Europe/London'
) -> pd.DataFrame:
# preparing start/end dates
start_date = pd.to_datetime(df[dt_col].min()) - pd.Timedelta(days=2)
end_date = pd.to_datetime(df[dt_col].max()) + pd.Timedelta(days=2)
# mapping from date and SP to local datetime
df_dates_SPs = dt_rng_to_SPs(start_date, end_date, freq=freq, tz=tz)
date_SP_to_ts = {(v[0], str(v[1])): k for k, v in df_dates_SPs.apply(tuple, axis=1).to_dict().items()}
df['local_datetime'] = df[[dt_col, SP_col]].apply(tuple, axis=1).map(date_SP_to_ts)
# reordering the `local_datetime` column to be first
cols = list(df.columns)
cols.remove('local_datetime')
df = df[['local_datetime'] + cols]
return df
df = parse_local_datetime(df)
df.head()
local_datetime |
documentType |
businessType |
processType |
timeSeriesID |
curveType |
settlementDate |
powerSystemResourceType |
registeredResourceEICCode |
marketGenerationUnitEICCode |
... |
bMUnitID |
nGCBMUnitID |
activeFlag |
documentID |
documentRevNum |
resolution |
start |
end |
settlementPeriod |
quantity |
2020-01-01 00:00:00+00:00 |
Actual generation |
Production |
Realised |
ELX-EMFIP-AGOG-TS-261 |
Sequential fixed size block |
2020-01-01 |
Generation |
48W00000EWHLW-1U |
48W00000EWHLW-1U |
... |
T_EWHLW-1 |
EWHLW-1 |
Y |
ELX-EMFIP-AGOG-22495386 |
1 |
PT30M |
2020-01-01 |
2020-01-01 |
1 |
1.87 |
2020-01-01 00:00:00+00:00 |
Actual generation |
Production |
Realised |
ELX-EMFIP-AGOG-TS-202 |
Sequential fixed size block |
2020-01-01 |
Generation |
48W00000TULWW-1U |
48W00000TULWW-1U |
... |
E_TULWW-1 |
TULWW-1 |
Y |
ELX-EMFIP-AGOG-22495386 |
1 |
PT30M |
2020-01-01 |
2020-01-01 |
1 |
15.618 |
2020-01-01 00:00:00+00:00 |
Actual generation |
Production |
Realised |
ELX-EMFIP-AGOG-TS-352 |
Sequential fixed size block |
2020-01-01 |
Generation |
48W000000STAY-4S |
48W000000STAY-4S |
... |
T_STAY-4 |
STAY-4 |
Y |
ELX-EMFIP-AGOG-22495386 |
1 |
PT30M |
2020-01-01 |
2020-01-01 |
1 |
195.8 |
2020-01-01 00:00:00+00:00 |
Actual generation |
Production |
Realised |
ELX-EMFIP-AGOG-TS-359 |
Sequential fixed size block |
2020-01-01 |
Generation |
48W000000TORN-1G |
48W000000TORN-1G |
... |
T_TORN-1 |
TORN-1 |
Y |
ELX-EMFIP-AGOG-22495386 |
1 |
PT30M |
2020-01-01 |
2020-01-01 |
1 |
631.456 |
2020-01-01 00:00:00+00:00 |
Actual generation |
Production |
Realised |
ELX-EMFIP-AGOG-TS-193 |
Sequential fixed size block |
2020-01-01 |
Generation |
48W00000LNMTH-1R |
48W00000LNMTH-1R |
... |
E_LYNE1 |
LNMTH-1 |
Y |
ELX-EMFIP-AGOG-22495386 |
1 |
PT30M |
2020-01-01 |
2020-01-01 |
1 |
130.298 |