Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 39 additions & 13 deletions census/nomis_census.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
Author: William Jay, November 2025
"""

import folium

import geopandas as gpd
import pandas as pd

Expand Down Expand Up @@ -114,30 +116,34 @@ def map_data_to_polygons(
gpkg_path: str,
geometry_field_nomis: str = "2021 output area",
geometry_field_gpkg: str = "OA21CD",
output_areas_to_keep: list[str] = None,
):
"""
Returns a geopandas DataFrame that contains geospatial polygons for each
geographic entry such as output areas or super output areas. A GeoPackage file
must be input containing these output areas at the resolution that matches the
census data.

If output_areas_to_keep is None (the default value), then all output areas are
kept. Otherwise all others are removed. Use this if you are only interested in a
specific region.
"""

# Open output areas as GeoDataFrame
gdf = gpd.read_file(gpkg_path)

# Remove output areas that are not required
if output_areas_to_keep is not None:
self.remove_polygons_from_gpkg(polygons_to_keep)

# Add geometry fields ass attributes
self.geometry_field_gpkg = geometry_field_gpkg

# Check that the datatypes of the join columns match
nomis_dtype = self.data[geometry_field_nomis].dtype
gpkg_dtype = gdf[self.geometry_field_gpkg].dtype
if nomis_dtype != gpkg_dtype:
raise ValueError(
"Datatype mismatch for join columns. NOMIS field "
f"'{geometry_field_nomis}' is '{nomis_dtype}' and GeoPackage field "
f"'{self.geometry_field_gpkg}' is '{gpkg_dtype}'"
)

# Join the census data to the output area polygons
gdf = gdf.merge(
self.data,
left_on=geometry_field_gpkg,
left_on=self.geometry_field_gpkg,
right_on=geometry_field_nomis,
how="left",
)
Expand All @@ -148,9 +154,29 @@ def map_data_to_polygons(
# Assign to attribute
self.mapped_data = mapped_gdf

def remove_polygons_from_gpkg(self, polygons_to_keep: list[str]):
def create_choropleth_map(
self,
value_field: str,
output_map: str,
start_coords: list[float],
zoom_level: int,
):
"""
Remove polygons from self.output_areas GeoDataFrame
Create a Leaflet choropleth map using the folium.Choropleth method
"""

pass
# Create map centered over Plymouth
folium_map = folium.Map(start_coords, zoom_start=zoom_level)

# Create choropleth features
choropleth = folium.Choropleth(
geo_data=self.mapped_data,
data=self.mapped_data,
columns=[self.geometry_field_gpkg, value_field],
key_on=f"feature.properties.{self.geometry_field_gpkg}",
legend_name=value_field,
highlight=True,
).add_to(folium_map)

# Write as HTML
folium_map.save(output_map)
43 changes: 5 additions & 38 deletions scripts/make_leaflet_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
"""

import argparse
import folium

import geopandas as gpd

Expand All @@ -26,6 +25,8 @@ def main(
percent_of_variable: str = None,
csv_geometry_field: str = CSV_GEOMETRY,
gpkg_geometry_field: str = GPKG_GEOMETRY,
start_coords: list[float] = PLYMOUTH_COORDS,
zoom_level: int = ZOOM_LEVEL,
):
"""
Create a Leaflet HTML choropleth map using Folium for a chosen variable in a
Expand All @@ -47,45 +48,11 @@ def main(

# Calculate percent of variable
if percent_of_variable is not None:

variable = census.calc_percent_of_variable(variable, percent_of_variable)

# Create choropleth Leaflet map
create_choropleth_map(
census.mapped_data,
variable,
gpkg_geometry_field,
output_map,
)
variable = census.calc_percent_of_variable(variable, percent_of_variable)


def create_choropleth_map(
gdf: gpd.GeoDataFrame,
value_field: str,
geometry_field: str,
output_map: str,
start_coords: list[float] = PLYMOUTH_COORDS,
zoom_level: int = ZOOM_LEVEL,
):
"""
Create a Leaflet choropleth map using the folium.Choropleth method
"""

# Create map centered over Plymouth
folium_map = folium.Map(start_coords, zoom_start=zoom_level)

# Create choropleth features
choropleth = folium.Choropleth(
geo_data=gdf,
data=gdf,
columns=[geometry_field, value_field],
key_on=f"feature.properties.{geometry_field}",
legend_name=value_field,
highlight=True,
).add_to(folium_map)

# Write as HTML
folium_map.save(output_map)
# Create choropleth Leaflet map
census.create_choropleth_map(variable, output_map, start_coords, zoom_level)


if __name__ == "__main__":
Expand Down
103 changes: 103 additions & 0 deletions scripts/plot_cornish_identity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#!/usr/bin/env python3
"""

Author: William Jay, November 2025
"""

import argparse

from census.nomis_census import Census

AUTHORITY_OF_INTEREST = "Cornwall"
AUTHORITY_FIELD = "MSOA21NM"
CSV_GEOMETRY = "Area"
GPKG_GEOMETRY = "MSOA21NM"


def main(
csv_path: str,
gpkg_path: str,
output_plot: str,
authorities_to_keep: list[str] = AUTHORITY_OF_INTEREST,
authority_field: str = AUTHORITY_FIELD,
csv_geometry_field: str = CSV_GEOMETRY,
gpkg_geometry_field: str = GPKG_GEOMETRY,
):
"""
Middle Super Output Area polygons accessed from
https://www.data.gov.uk/dataset/677a5164-3a9e-4752-b8e6-5744d2b280ec/middle-layer-super-output-areas-december-2021-boundaries-ew-bgc-v3
"""

# Get Census object
census = Census(csv_path)

# This dataset has MSOAs named with the prefix of 'msoa2021:' which needs removing
# in order to match the MSOA names in the GeoPackage. The following line removes
# characters to the left of and including ':'
census.data[csv_geometry_field] = (
census.data[csv_geometry_field].str.split(":").str.get(1)
)

# Map data onto output area polygons
census.map_data_to_polygons(gpkg_path, csv_geometry_field, gpkg_geometry_field)

# Remove MSOAs that are not Cornwall
census.mapped_data = census.mapped_data[
census.mapped_data["MSOA21NM"].str.startswith("Cornwall")
]

fields_of_interest = [
"UK identity: British only identity",
"UK identity: English only identity",
"UK identity: English and British only identity",
"UK identity: Cornish only identity",
"UK identity: Cornish and British only identity",
]
total_field = "Total: All usual residents"
# List to populate new field names with
percent_fields = []

for field in fields_of_interest:

# Calculate field as a percentage of the total per output area
new_field_name = census.calc_percent_of_variable(field, total_field)
# Add new field name to list
percent_fields.append(new_field_name)

# Create simple choropleth leaflet map
census.create_choropleth_map(
"UK identity: Cornish only identity_percent",
output_plot,
[50.406, -4.848],
9,
)


if __name__ == "__main__":
helpstring = ""
parser = argparse.ArgumentParser(
description=helpstring,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"--input_csv",
type=str,
required=True,
help="Unix-style glob to CSV files downloaded from NOMIS",
)
parser.add_argument(
"--input_gpkg",
type=str,
required=True,
help="Path to GeoPackage file containing census Output Area features",
)
parser.add_argument(
"--output_plot",
type=str,
required=True,
help="Path to write output plot to",
)

cmdline = parser.parse_args()

main(cmdline.input_csv, cmdline.input_gpkg, cmdline.output_plot)