Skip to content

Map Generation

This notebook shows the development of the code required to generate the live map of energy generation by power plant.


Imports

#exports
import json
import numpy as np
import pandas as pd
import geopandas as gpd

import os
import typer
from tqdm import tqdm
from jinja2 import Template

from ElexonDataPortal.dev import utils, raw
from IPython.display import JSON
import matplotlib.pyplot as plt


Network Route Map

The raw network route map data can be retrieved from here. We want to extract the relevant data, group the different cables/lines according to capacity, then save the information as a geojson.

#exports
def clean_route_gdf(gdf):
    s_LV_routes = gdf['OPERATING_'].astype(float).fillna(0)<=132
    gdf.loc[s_LV_routes, 'OPERATING_'] = '<=132'
    gdf.loc[~s_LV_routes, 'OPERATING_'] = gdf.loc[~s_LV_routes, 'OPERATING_'].astype(int).astype(str)

    return gdf

def load_route_gdf(data_dir='data'):
    gdf_OHL = gpd.read_file(f'{data_dir}/transmission-system/OHL.shp').to_crs('EPSG:4326')
    gdf_cables = gpd.read_file(f'{data_dir}/transmission-system/Cable.shp').to_crs('EPSG:4326')

    gdf_OHL = clean_route_gdf(gdf_OHL)
    gdf_cables = clean_route_gdf(gdf_cables)

    gdf_route = gdf_OHL.append(gdf_cables)[['OPERATING_', 'geometry']].rename(columns={'OPERATING_': 'kV'})
    gdf_route.to_file(f'{data_dir}/network_routes.json', driver='GeoJSON')

    return gdf_route
gdf_route = load_route_gdf('../data')

gdf_route.head(3)
kV geometry
400 LINESTRING Z (-2.02839 51.85078 0.00000, -2.02...
400 LINESTRING Z (-2.02754 51.84931 0.00000, -2.02...
400 LINESTRING Z (-3.12672 51.20292 0.00000, -3.12...


We'll quickly visualise the network route and count how many of the different capacity types we have.

gdf_route.plot(column='kV', legend=True, legend_kwds={'frameon': False})

gdf_route['kV'].value_counts()
275      1858
400      1232
<=132    1087
Name: kV, dtype: int64

png


Power Plant Output Data Retrieval

We'll begin by creating a function to retrieve and clean Physical Notification (PN) data for transmission level plants in Great Britain.

#exports
def construct_df_PN_pivot_dt_rng(df_PN):
    no_seconds = (((df_PN['timeFrom'].str.split(':').str[-1]=='00').mean()==1) & 
                  ((df_PN['timeTo'].str.split(':').str[-1]=='00').mean()==1))

    if no_seconds == True:
        dt_rng = pd.date_range(df_PN['timeFrom'].min(), df_PN['timeTo'].max(), freq='min', tz='Europe/London')
    else:
        dt_rng = pd.date_range(df_PN['timeFrom'].min(), df_PN['timeTo'].max(), freq='s', tz='Europe/London')

    return dt_rng

def construct_PN_pivot_df(df_PN, resample=None):
    bmu_ids = sorted(list(df_PN['bmUnitID'].unique()))
    df_PN_pivot = pd.DataFrame(index=construct_df_PN_pivot_dt_rng(df_PN), columns=bmu_ids, dtype=float)

    for bmu_id in tqdm(bmu_ids):
        for idx, row in df_PN.query('bmUnitID==@bmu_id').iterrows():
            df_PN_pivot.loc[pd.to_datetime(row['timeFrom']).tz_localize('Europe/London'), bmu_id] = float(row['pnLevelFrom'])
            df_PN_pivot.loc[pd.to_datetime(row['timeTo']).tz_localize('Europe/London'), bmu_id] = float(row['pnLevelTo'])

        df_PN_pivot[bmu_id] = df_PN_pivot[bmu_id].interpolate()

    if resample is not None:
        df_PN_pivot = df_PN_pivot.resample(resample).mean()

    return df_PN_pivot

def get_PHYBM_df(api_key, start_date=None, end_date=None, record_type='PN', resample='30T'):
    if start_date is None and end_date is None:
        start_date = pd.Timestamp.now().round('30min') - pd.Timedelta(minutes=60*1) 
        end_date = pd.Timestamp.now().round('30min') + pd.Timedelta(minutes=180)
    elif start_date is not None and end_date is not None:
        pass
    else:
        raise ValueError('Only one of `start_date` and `end_date` was specified')

    df = pd.DataFrame()
    df_local_datetime_to_date_SP = utils.dt_rng_to_SPs(start_date, end_date)

    for idx, (date, SP) in tqdm(df_local_datetime_to_date_SP.iterrows(), total=df_local_datetime_to_date_SP.shape[0]):
        df_SP = utils.parse_xml_response(raw.get_PHYBMDATA(api_key, SettlementDate=date, SettlementPeriod=SP, ServiceType='xml'))
        df = df.append(df_SP)

    df = (df
          .query(f'recordType=="{record_type}"')
          .dropna(how='all', axis=1)
         )

    if resample is not None:
        df = df.pipe(construct_PN_pivot_df, resample=resample)

    df.index.name = 'local_datetime'

    return df
df_PN = get_PHYBM_df(api_key)

df_PN.index.min(), df_PN.index.max()
 44%â–Ž                                              | 4/9 [00:13<00:15,  3.13s/it]c:\users\ayrto\desktop\phd\data\bmrs\elexon-bmrs-api-wrapper\ElexonDataPortal\dev\utils.py:29: UserWarning: Data request was succesful but no content was returned
  warn(f'Data request was succesful but no content was returned')
100% 9/9 [00:14<00:00,  1.58s/it]
100% 1370/1370 [00:10<00:00, 133.33it/s]





(Timestamp('2021-06-25 15:00:00+0100', tz='Europe/London', freq='30T'),
 Timestamp('2021-06-25 17:00:00+0100', tz='Europe/London', freq='30T'))


We'll now inspect the data from the closest half-hour settlement period that is available.

nearest_half_hour = (pd.Timestamp.now(tz='Europe/London')+pd.Timedelta(minutes=15)).round('30min')
most_recent_available_dt = max(df_PN.index[df_PN.index<=nearest_half_hour])

s_PN = df_PN.loc[most_recent_available_dt]

s_PN.replace(0, np.nan).dropna().head()
2__ABGAS000   -378.0
2__AEELC000   -174.0
2__AEMEB000   -200.0
2__AENRD000   -145.0
2__AEOND000   -177.0
Name: 2021-06-25 17:00:00+01:00, dtype: float64


We want to be able to show not just current but historical data, however the time it takes to download the PN is quite long. To get around this issue we'll save data in monthly batches and only request data that is not currently contained in the existing csv files.

#exports
get_files = lambda data_dir: [f for f in os.listdir(data_dir) if '.csv' in f]

def download_latest_PHYBM_data(api_key=None, data_dir='data/PN', record_type='PN'):
    if api_key is None:
        if 'BMRS_API_KEY' in os.environ.keys():
            api_key = os.environ['BMRS_API_KEY']
        else:
            raise ValueError('`api_key` must be passed or set as the environment variable `BMRS_API_KEY`')

    files = get_files(data_dir)
    years_months_downloaded = [f.split('.')[0] for f in files]

    current_ts = pd.Timestamp.now(tz='Europe/London')
    current_year_month = current_ts.strftime('%Y-%m')

    if current_year_month not in years_months_downloaded:
        start_date, end_date = f'{current_year_month}-01 00:00', current_ts.strftime('%Y-%m-%d %H:%M')
        df = get_PHYBM_df(api_key, start_date, end_date, record_type=record_type)
        df.to_csv(f'{data_dir}/{current_year_month}.csv')

    else:
        df = pd.read_csv(f'{data_dir}/{current_year_month}.csv')

        df = df.set_index('local_datetime')
        df.index = pd.to_datetime(df.index, utc=True).tz_convert('Europe/London')
        dt_rng = pd.date_range(df.index.max(), current_ts, freq='30T', tz='Europe/London')

        if dt_rng.size > 1:
            start_date = dt_rng[0] - pd.Timedelta(minutes=30)
            end_date = dt_rng[-1] + pd.Timedelta(minutes=60)

            try:
                df_latest = get_PHYBM_df(api_key, start_date, end_date, record_type=record_type)
                df_trimmed = df.drop(list(set(df_latest.index) - (set(df_latest.index) - set(df.index))))
                df_combined = df_trimmed.append(df_latest).sort_index()
                df_combined.to_csv(f'{data_dir}/{current_year_month}.csv')
            except: 
                warn(f'Could not retrieve any new data between {start_date} and {end_date}')
download_latest_PHYBM_data(api_key, data_dir='../data/PN')
100% 5/5 [00:18<00:00,  3.78s/it]
100% 1370/1370 [00:11<00:00, 118.20it/s]


We'll also create a helper function for loading in the latest data, when there is less than a week's worth of data in the latest month we'll append the dataframe to last months data as well.

#exports
def load_most_recent_PN_data(data_dir='data/PN', latest_year_month_file=None, df_PN_old=None):
    if latest_year_month_file is None:
        PN_files = sorted(get_files(data_dir))
        latest_year_month_file = PN_files[-1]

    latest_fp = f'{data_dir}/{latest_year_month_file}'

    df_PN = pd.read_csv(latest_fp)

    df_PN['local_datetime'] = pd.to_datetime(df_PN['local_datetime'], utc=True)
    df_PN = df_PN.set_index('local_datetime')
    df_PN.index = df_PN.index.tz_convert('Europe/London')

    if df_PN_old is not None:
        assert latest_year_month_file is not None, 'Should not be appending to the main dataframe if `latest_year_month_file` was not specified'
        df_PN = df_PN_old.append(df_PN)

    if df_PN.shape[0] < (48*7): # want to have at least a week's worth of data
        assert 'PN_files' in locals(), 'The two most recent files combined have less than one week\'s worth of data'
        df_PN = load_most_recent_PN_data(data_dir, latest_year_month_file=PN_files[-2], df_PN_old=df_PN)

    return df_PN
df_PN = load_most_recent_PN_data(data_dir='../data/PN')

df_PN.tail()
local_datetime 2__AANGE001 2__AANGE002 2__ABGAS000 2__ACNDL001 2__AEDFE000 2__AEDIR000 2__AEELC000 2__AEENG000 2__AEMEB000 2__AENRD000 ... V__KGAZP002 V__LCEND001 V__LFLEX001 V__MADEL001 V__MGBLO001 V__NFLEX001 V__PFLEX001 I_I2D-MQBN1 I_I2G-MQBN1 I_IFG-MQBN1
2021-06-25 15:00:00+01:00 0 0 -299.917 0 0 0 -154.833 0 -177.8 -171.617 ... 0 0 0 0 0 0 0 0 0 0
2021-06-25 15:30:00+01:00 0 0 -335.4 0 0 0 -161.933 0 -188.35 -162.2 ... 0 0 0 0 0 0 0 0 0 0
2021-06-25 16:00:00+01:00 0 0 -365.6 0 0 0 -168.833 0 -196.383 -150.683 ... 0 0 0 0 0 0 0 0 0 0
2021-06-25 16:30:00+01:00 0 0 -378 0 0 0 -174 0 -200 -145 ... 0 0 0 0 0 0 0 0 0 0
2021-06-25 17:00:00+01:00 0 0 -378 0 0 0 -174 0 -200 -145 ... 0 0 0 0 0 0 0 0 0 0


Location & Fuel Type Data Collation

Now we're redy to start building the map, for this we'll need information on both the location and fuel-type (which will be represented by the colour of the plant). We can get this data from the power station dictionary project.

df_powerdict = pd.read_csv('https://raw.githubusercontent.com/OSUKED/Power-Station-Dictionary/main/data/output/power_stations.csv')

df_powerdict.head(3)
osuked_id esail_id gppd_idnr name sett_bmu_id longitude latitude fuel_type capacity_mw
10000 MARK nan Rothes Bio-Plant CHP E_MARK-1, E_MARK-2 -3.60352 57.4804 biomass nan
10001 DIDC nan Didcot A (G) T_DIDC1, T_DIDC2, T_DIDC4, T_DIDC3 -1.26757 51.6236 coal nan
10002 ABTH GBR1000374 Aberthaw B T_ABTH7, T_ABTH8, T_ABTH9 -3.40487 51.3873 coal 1586


We'll create some helper dictionaries for mapping from the osuked id to the data we're interested in

#exports
def construct_osuked_id_mappings(df_powerdict):
    osuked_id_mappings = dict()

    osuked_id_mappings['bmu_ids'] = (df_powerdict
                                     .set_index('osuked_id')
                                     ['sett_bmu_id']
                                     .str.split(', ')
                                     .dropna()
                                     .to_dict()
                                    )

    osuked_id_mappings['capacity_mw'] = (df_powerdict
                                         .set_index('osuked_id')
                                         ['capacity_mw']
                                         .astype(str)
                                         .str.replace('.0', '', regex=False)
                                         .to_dict()
                                        )

    osuked_id_mappings['fuel_type'] = (df_powerdict
                                       .set_index('osuked_id')
                                       ['fuel_type']
                                       .dropna()
                                       .to_dict()
                                      )

    osuked_id_mappings['name'] = (df_powerdict
                                  .set_index('osuked_id')
                                  ['name']
                                  .dropna()
                                  .to_dict()
                                 )

    osuked_id_mappings['lat_lon'] = (df_powerdict
                                     .set_index('osuked_id')
                                     [['latitude', 'longitude']]
                                     .dropna()
                                     .apply(dict, axis=1)
                                     .to_dict()
                                    )

    return osuked_id_mappings
osuked_id_mappings = construct_osuked_id_mappings(df_powerdict)
osuked_id_to_bmu_ids, osuked_id_to_capacity_mw, osuked_id_to_fuel_type, osuked_id_to_name, osuked_id_to_lat_lon = osuked_id_mappings.values()

pd.Series(osuked_id_to_bmu_ids).head().to_dict()
{10000: ['E_MARK-1', 'E_MARK-2'],
 10001: ['T_DIDC1', 'T_DIDC2', 'T_DIDC4', 'T_DIDC3'],
 10002: ['T_ABTH7', 'T_ABTH8', 'T_ABTH9'],
 10003: ['T_COTPS-1', 'T_COTPS-2', 'T_COTPS-3', 'T_COTPS-4'],
 10004: ['T_DRAXX-1',
  'T_DRAXX-2',
  'T_DRAXX-3',
  'T_DRAXX-4',
  'T_DRAXX-5',
  'T_DRAXX-6']}


We'll quickly inspect how much of the current generator capacity our contextual plant data covers.

flatten_list = lambda list_: [item for sublist in list_ for item in sublist]

bmu_ids_with_metadata = sorted(list(set(flatten_list(osuked_id_to_bmu_ids.values()))))
bmu_ids_with_metadata_and_output = df_PN.columns.intersection(bmu_ids_with_metadata)
bmu_ids_without_metadata = sorted(list(set(df_PN.columns) -set(bmu_ids_with_metadata)))

pct_site_coverage = len(bmu_ids_with_metadata_and_output)/df_PN.columns.size
pct_output_coverage = df_PN.sum()[bmu_ids_with_metadata_and_output].sum()/df_PN.sum().sum()

# print(f"{pct_site_coverage:.0%} of the sites have coverage, making up {pct_output_coverage:.0%} of the total power output. The following are missing:\n{', '.join(bmu_ids_without_metadata)}")


We'll now create a new dataframe that contains the contextual plant data as well as the generation time-series.

#exports
def extract_PN_ts(df_PN, bmu_ids, n_SPs=48*7):
    matching_output_bmu_ids = df_PN.columns.intersection(bmu_ids)
    output_match = matching_output_bmu_ids.size > 0

    if output_match == False:
        return None

    s_PN = df_PN[matching_output_bmu_ids].sum(axis=1)
    s_PN.index = (s_PN.index.tz_convert('UTC') - pd.to_datetime(0, unit='s').tz_localize('UTC')).total_seconds().astype(int) * 1000

    s_PN = s_PN.fillna(0)
    s_PN[s_PN<0] = 0
    PN_ts = s_PN.tail(n_SPs).to_dict()

    return PN_ts

def construct_map_df(
    df_PN, 
    osuked_id_to_bmu_ids, 
    osuked_id_to_capacity_mw,
    osuked_id_to_lat_lon, 
    osuked_id_to_fuel_type, 
    osuked_id_to_name,
    n_SPs=48*7
):
    sites_data = list()

    for osuked_id, bmu_ids in osuked_id_to_bmu_ids.items():
        lat_lon_match = osuked_id in osuked_id_to_lat_lon.keys()

        PN_ts = extract_PN_ts(df_PN, bmu_ids, n_SPs=n_SPs)

        if lat_lon_match and PN_ts is not None:
            if sum(PN_ts.values()) > 0:
                site_data = osuked_id_to_lat_lon[osuked_id]
                site_data.update({'id': osuked_id})
                site_data.update({'name': osuked_id_to_name[osuked_id]})
                site_data.update({'capacity': osuked_id_to_capacity_mw[osuked_id]})
                site_data.update({'fuel_type': osuked_id_to_fuel_type[osuked_id]})
                site_data.update({'output': PN_ts})

                sites_data += [site_data]

    df_map = pd.DataFrame(sites_data).set_index('id')

    return df_map
df_map = construct_map_df(df_PN, osuked_id_to_bmu_ids, osuked_id_to_capacity_mw, osuked_id_to_lat_lon, osuked_id_to_fuel_type, osuked_id_to_name)

df_map.head(3)
id latitude longitude name capacity fuel_type output
10000 57.4804 -3.60352 Rothes Bio-Plant CHP nan biomass {1624033800000: 55.0, 1624035600000: 55.0, 162...
10004 53.7487 -0.626221 Drax 1980 coal, biomass {1624033800000: 1950.0, 1624035600000: 1950.0,...
10010 55.2042 -1.52083 Lynemouth Generator nan coal {1624033800000: 405.0, 1624035600000: 405.0, 1...


We now want to convert this into a geodataframe so we can visualise it spatially

#exports
def df_to_gdf(df, lat_col='latitude', lon_col='longitude'):
    geometry = gpd.points_from_xy(df[lon_col], df[lat_col])
    gdf = gpd.GeoDataFrame(df, geometry=geometry)

    return gdf
gdf_map = df_to_gdf(df_map)
gdf_map_latest = gdf_map.assign(output=gdf_map['output'].apply(lambda d: list(d.values())[-1]))

gdf_map_latest.plot(markersize='output', column='fuel_type', alpha=0.5, legend=True)
<AxesSubplot:>

png

#exports
def construct_map_geojson(
    df_PN, 
    df_powerdict,
    n_SPs=48*7
):
    osuked_id_mappings = construct_osuked_id_mappings(df_powerdict)
    osuked_id_to_bmu_ids, osuked_id_to_capacity_mw, osuked_id_to_fuel_type, osuked_id_to_name, osuked_id_to_lat_lon = osuked_id_mappings.values()

    df_map = construct_map_df(df_PN, osuked_id_to_bmu_ids, osuked_id_to_capacity_mw, osuked_id_to_lat_lon, osuked_id_to_fuel_type, osuked_id_to_name, n_SPs=n_SPs)
    gdf_map = df_to_gdf(df_map)

    geojson = json.loads(gdf_map.to_json().replace('"nan"', 'null'))
    geojson['timeseries'] = [int(unix_datetime) for unix_datetime in list(geojson['features'][0]['properties']['output'].keys())]

    return geojson
geojson = construct_map_geojson(df_PN, df_powerdict)

JSON(geojson)
<IPython.core.display.JSON object>
#exports
def save_map_geojson(geojson, fp='data/power_plants.json'):
    with open(fp, 'w') as f:
        json.dump(geojson, f)
save_map_geojson(geojson, fp='../data/power_plants.json')
#exports
def get_nearest_dt_idx(geojson, nearest_half_hour):
    ts = pd.to_datetime([x*1000000 for x in geojson['timeseries']]).tz_localize('UTC').tz_convert('Europe/London')
    nearest_hh_match = [i for i, dt in enumerate(ts) if dt==nearest_half_hour]

    if len(nearest_hh_match) == 1:
        return nearest_hh_match[0]
    else:
        return len(ts)-1

def generate_map_js(
    df_PN: pd.DataFrame, 
    df_powerdict: pd.DataFrame, 
    zoom: int=5,
    center: list=[54.8, -4.61],
    js_template_fp: str='templates/map.js', 
    js_docs_fp: str='docs/js/map.js',
    plants_geojson_fp: str='data/power_plants.json',
    plants_geojson_url: str='https://raw.githubusercontent.com/OSUKED/ElexonDataPortal/master/data/power_plants.json',
    routes_geojson_url: str='https://raw.githubusercontent.com/OSUKED/ElexonDataPortal/master/data/network_routes.json'
):
    geojson = construct_map_geojson(df_PN, df_powerdict)
    save_map_geojson(geojson, fp=plants_geojson_fp)

    nearest_half_hour = (pd.Timestamp.now().tz_localize('Europe/London')+pd.Timedelta(minutes=15)).round('30min')
    nearest_dt_idx = get_nearest_dt_idx(geojson, nearest_half_hour)

    rendered_map_js = Template(open(js_template_fp).read()).render(
        zoom=zoom,
        start_idx=nearest_dt_idx,
        center=center,
        plants_geojson_url=plants_geojson_url,
        routes_geojson_url=routes_geojson_url,
    )

    with open(js_docs_fp, 'w', encoding='utf8') as fp:
        fp.write(rendered_map_js)

    return
generate_map_js(df_PN, df_powerdict, plants_geojson_fp='../data/power_plants.json', js_template_fp='../templates/map.js', js_docs_fp='../docs/js/map.js')
# clean up the pop-up
#exports
def generate_map_md(md_template_fp='templates/map.md', md_docs_fp='docs/map.md'):
    update_date = pd.Timestamp.now().round('5min').tz_localize('Europe/London').strftime('%Y-%m-%d %H:%M')
    rendered_map_md = Template(open(md_template_fp).read()).render({'update_date': update_date})

    with open(md_docs_fp, 'w', encoding='utf8') as fp:
        fp.write(rendered_map_md)

    return
generate_map_md(md_template_fp='../templates/map.md', md_docs_fp='../docs/map.md')
#exports
app = typer.Typer()


python -m ElexonDataPortal.dev.mapgen

#exports
@app.command()
def generate_map(
    api_key: str=None,
    powerdict_url: str='https://raw.githubusercontent.com/OSUKED/Power-Station-Dictionary/main/data/output/power_stations.csv',
    js_template_fp: str='templates/map.js', 
    js_docs_fp: str='docs/js/map.js', 
    md_template_fp: str='templates/map.md', 
    md_docs_fp: str='docs/map.md',
    data_dir: str='data/PN',
    plants_geojson_fp: str='data/power_plants.json',
    plants_geojson_url: str='https://raw.githubusercontent.com/OSUKED/ElexonDataPortal/master/data/power_plants.json',
    routes_geojson_url: str='https://raw.githubusercontent.com/OSUKED/ElexonDataPortal/master/data/network_routes.json'
):
    if api_key is None:
        assert 'BMRS_API_KEY' in os.environ.keys(), 'If the `api_key` is not specified during client initialisation then it must be set to as the environment variable `BMRS_API_KEY`'
        api_key = os.environ['BMRS_API_KEY']

    download_latest_PHYBM_data(api_key, data_dir)
    df_PN = load_most_recent_PN_data(data_dir)

    df_powerdict = pd.read_csv(powerdict_url)

    generate_map_js(df_PN, df_powerdict, js_template_fp=js_template_fp, js_docs_fp=js_docs_fp, plants_geojson_fp=plants_geojson_fp, plants_geojson_url=plants_geojson_url, routes_geojson_url=routes_geojson_url)
    generate_map_md(md_template_fp=md_template_fp, md_docs_fp=md_docs_fp)

    return 
generate_map(
    api_key=api_key,
    js_template_fp='../templates/map.js', 
    js_docs_fp='../docs/js/map.js', 
    md_template_fp='../templates/map.md', 
    md_docs_fp='../docs/map.md',
    data_dir='../data/PN',
    plants_geojson_fp='../data/power_plants.json'
)
#exports
if __name__ == '__main__' and '__file__' in globals():
    app()