willjayeo · willjayeo · Nov 18, 2025 · Nov 18, 2025 · Nov 19, 2025 · Nov 19, 2025
diff --git a/census/nomis_census.py b/census/nomis_census.py
@@ -4,6 +4,8 @@
 Author: William Jay, November 2025
 """
 
+import folium
+
 import geopandas as gpd
 import pandas as pd
 
@@ -114,30 +116,34 @@ def map_data_to_polygons(
         gpkg_path: str,
         geometry_field_nomis: str = "2021 output area",
         geometry_field_gpkg: str = "OA21CD",
-        output_areas_to_keep: list[str] = None,
     ):
         """
         Returns a geopandas DataFrame that contains geospatial polygons for each
         geographic entry such as output areas or super output areas. A GeoPackage file
         must be input containing these output areas at the resolution that matches the
         census data.
-
-        If output_areas_to_keep is None (the default value), then all output areas are
-        kept. Otherwise all others are removed. Use this if you are only interested in a
-        specific region.
         """
 
         # Open output areas as GeoDataFrame
         gdf = gpd.read_file(gpkg_path)
 
-        # Remove output areas that are not required
-        if output_areas_to_keep is not None:
-            self.remove_polygons_from_gpkg(polygons_to_keep)
-
+        # Add geometry fields ass attributes
+        self.geometry_field_gpkg = geometry_field_gpkg
+
+        # Check that the datatypes of the join columns match
+        nomis_dtype = self.data[geometry_field_nomis].dtype
+        gpkg_dtype = gdf[self.geometry_field_gpkg].dtype
+        if nomis_dtype != gpkg_dtype:
+            raise ValueError(
+                "Datatype mismatch for join columns. NOMIS field "
+                f"'{geometry_field_nomis}' is '{nomis_dtype}' and GeoPackage field "
+                f"'{self.geometry_field_gpkg}' is '{gpkg_dtype}'"
+            )
+
         # Join the census data to the output area polygons
         gdf = gdf.merge(
             self.data,
-            left_on=geometry_field_gpkg,
+            left_on=self.geometry_field_gpkg,
             right_on=geometry_field_nomis,
             how="left",
         )
@@ -148,9 +154,29 @@ def map_data_to_polygons(
         # Assign to attribute
         self.mapped_data = mapped_gdf
 
-    def remove_polygons_from_gpkg(self, polygons_to_keep: list[str]):
+    def create_choropleth_map(
+        self,
+        value_field: str,
+        output_map: str,
+        start_coords: list[float],
+        zoom_level: int,
+    ):
         """
-        Remove polygons from self.output_areas GeoDataFrame
+        Create a Leaflet choropleth map using the folium.Choropleth method
         """
 
-        pass
+        # Create map centered over Plymouth
+        folium_map = folium.Map(start_coords, zoom_start=zoom_level)
+
+        # Create choropleth features
+        choropleth = folium.Choropleth(
+            geo_data=self.mapped_data,
+            data=self.mapped_data,
+            columns=[self.geometry_field_gpkg, value_field],
+            key_on=f"feature.properties.{self.geometry_field_gpkg}",
+            legend_name=value_field,
+            highlight=True,
+        ).add_to(folium_map)
+
+        # Write as HTML
+        folium_map.save(output_map)
diff --git a/scripts/make_leaflet_map.py b/scripts/make_leaflet_map.py
@@ -6,7 +6,6 @@
 """
 
 import argparse
-import folium
 
 import geopandas as gpd
 
@@ -26,6 +25,8 @@ def main(
     percent_of_variable: str = None,
     csv_geometry_field: str = CSV_GEOMETRY,
     gpkg_geometry_field: str = GPKG_GEOMETRY,
+    start_coords: list[float] = PLYMOUTH_COORDS,
+    zoom_level: int = ZOOM_LEVEL,
 ):
     """
     Create a Leaflet HTML choropleth map using Folium for a chosen variable in a
@@ -47,45 +48,11 @@ def main(
 
     # Calculate percent of variable
     if percent_of_variable is not None:
-
-       variable = census.calc_percent_of_variable(variable, percent_of_variable)
 
-    # Create choropleth Leaflet map
-    create_choropleth_map(
-        census.mapped_data,
-        variable,
-        gpkg_geometry_field,
-        output_map,
-    )
+        variable = census.calc_percent_of_variable(variable, percent_of_variable)
 
-
-def create_choropleth_map(
-    gdf: gpd.GeoDataFrame,
-    value_field: str,
-    geometry_field: str,
-    output_map: str,
-    start_coords: list[float] = PLYMOUTH_COORDS,
-    zoom_level: int = ZOOM_LEVEL,
-):
-    """
-    Create a Leaflet choropleth map using the folium.Choropleth method
-    """
-
-    # Create map centered over Plymouth
-    folium_map = folium.Map(start_coords, zoom_start=zoom_level)
-
-    # Create choropleth features
-    choropleth = folium.Choropleth(
-        geo_data=gdf,
-        data=gdf,
-        columns=[geometry_field, value_field],
-        key_on=f"feature.properties.{geometry_field}",
-        legend_name=value_field,
-        highlight=True,
-    ).add_to(folium_map)
-
-    # Write as HTML
-    folium_map.save(output_map)
+    # Create choropleth Leaflet map
+    census.create_choropleth_map(variable, output_map, start_coords, zoom_level)
 
 
 if __name__ == "__main__":

diff --git a/scripts/plot_cornish_identity.py b/scripts/plot_cornish_identity.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+"""
+
+Author: William Jay, November 2025
+"""
+
+import argparse
+
+from census.nomis_census import Census
+
+AUTHORITY_OF_INTEREST = "Cornwall"
+AUTHORITY_FIELD = "MSOA21NM"
+CSV_GEOMETRY = "Area"
+GPKG_GEOMETRY = "MSOA21NM"
+
+
+def main(
+    csv_path: str,
+    gpkg_path: str,
+    output_plot: str,
+    authorities_to_keep: list[str] = AUTHORITY_OF_INTEREST,
+    authority_field: str = AUTHORITY_FIELD,
+    csv_geometry_field: str = CSV_GEOMETRY,
+    gpkg_geometry_field: str = GPKG_GEOMETRY,
+):
+    """
+    Middle Super Output Area polygons accessed from
+    https://www.data.gov.uk/dataset/677a5164-3a9e-4752-b8e6-5744d2b280ec/middle-layer-super-output-areas-december-2021-boundaries-ew-bgc-v3
+    """
+
+    # Get Census object
+    census = Census(csv_path)
+
+    # This dataset has MSOAs named with the prefix of 'msoa2021:' which needs removing
+    # in order to match the MSOA names in the GeoPackage. The following line removes
+    # characters to the left of and including ':'
+    census.data[csv_geometry_field] = (
+        census.data[csv_geometry_field].str.split(":").str.get(1)
+    )
+
+    # Map data onto output area polygons
+    census.map_data_to_polygons(gpkg_path, csv_geometry_field, gpkg_geometry_field)
+
+    # Remove MSOAs that are not Cornwall
+    census.mapped_data = census.mapped_data[
+        census.mapped_data["MSOA21NM"].str.startswith("Cornwall")
+    ]
+
+    fields_of_interest = [
+        "UK identity: British only identity",
+        "UK identity: English only identity",
+        "UK identity: English and British only identity",
+        "UK identity: Cornish only identity",
+        "UK identity: Cornish and British only identity",
+    ]
+    total_field = "Total: All usual residents"
+    # List to populate new field names with
+    percent_fields = []
+
+    for field in fields_of_interest:
+
+        # Calculate field as a percentage of the total per output area
+        new_field_name = census.calc_percent_of_variable(field, total_field)
+        # Add new field name to list
+        percent_fields.append(new_field_name)
+
+    # Create simple choropleth leaflet map
+    census.create_choropleth_map(
+        "UK identity: Cornish only identity_percent",
+        output_plot,
+        [50.406, -4.848],
+        9,
+    )
+
+
+if __name__ == "__main__":
+    helpstring = ""
+    parser = argparse.ArgumentParser(
+        description=helpstring,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument(
+        "--input_csv",
+        type=str,
+        required=True,
+        help="Unix-style glob to CSV files downloaded from NOMIS",
+    )
+    parser.add_argument(
+        "--input_gpkg",
+        type=str,
+        required=True,
+        help="Path to GeoPackage file containing census Output Area features",
+    )
+    parser.add_argument(
+        "--output_plot",
+        type=str,
+        required=True,
+        help="Path to write output plot to",
+    )
+
+    cmdline = parser.parse_args()
+
+    main(cmdline.input_csv, cmdline.input_gpkg, cmdline.output_plot)