diff --git a/census/nomis_census.py b/census/nomis_census.py index 7dcac40..888bee6 100644 --- a/census/nomis_census.py +++ b/census/nomis_census.py @@ -4,6 +4,8 @@ Author: William Jay, November 2025 """ +import folium + import geopandas as gpd import pandas as pd @@ -114,30 +116,34 @@ def map_data_to_polygons( gpkg_path: str, geometry_field_nomis: str = "2021 output area", geometry_field_gpkg: str = "OA21CD", - output_areas_to_keep: list[str] = None, ): """ Returns a geopandas DataFrame that contains geospatial polygons for each geographic entry such as output areas or super output areas. A GeoPackage file must be input containing these output areas at the resolution that matches the census data. - - If output_areas_to_keep is None (the default value), then all output areas are - kept. Otherwise all others are removed. Use this if you are only interested in a - specific region. """ # Open output areas as GeoDataFrame gdf = gpd.read_file(gpkg_path) - # Remove output areas that are not required - if output_areas_to_keep is not None: - self.remove_polygons_from_gpkg(polygons_to_keep) - + # Add geometry fields ass attributes + self.geometry_field_gpkg = geometry_field_gpkg + + # Check that the datatypes of the join columns match + nomis_dtype = self.data[geometry_field_nomis].dtype + gpkg_dtype = gdf[self.geometry_field_gpkg].dtype + if nomis_dtype != gpkg_dtype: + raise ValueError( + "Datatype mismatch for join columns. NOMIS field " + f"'{geometry_field_nomis}' is '{nomis_dtype}' and GeoPackage field " + f"'{self.geometry_field_gpkg}' is '{gpkg_dtype}'" + ) + # Join the census data to the output area polygons gdf = gdf.merge( self.data, - left_on=geometry_field_gpkg, + left_on=self.geometry_field_gpkg, right_on=geometry_field_nomis, how="left", ) @@ -148,9 +154,29 @@ def map_data_to_polygons( # Assign to attribute self.mapped_data = mapped_gdf - def remove_polygons_from_gpkg(self, polygons_to_keep: list[str]): + def create_choropleth_map( + self, + value_field: str, + output_map: str, + start_coords: list[float], + zoom_level: int, + ): """ - Remove polygons from self.output_areas GeoDataFrame + Create a Leaflet choropleth map using the folium.Choropleth method """ - pass + # Create map centered over Plymouth + folium_map = folium.Map(start_coords, zoom_start=zoom_level) + + # Create choropleth features + choropleth = folium.Choropleth( + geo_data=self.mapped_data, + data=self.mapped_data, + columns=[self.geometry_field_gpkg, value_field], + key_on=f"feature.properties.{self.geometry_field_gpkg}", + legend_name=value_field, + highlight=True, + ).add_to(folium_map) + + # Write as HTML + folium_map.save(output_map) diff --git a/scripts/make_leaflet_map.py b/scripts/make_leaflet_map.py index 8c0b55d..1e13274 100755 --- a/scripts/make_leaflet_map.py +++ b/scripts/make_leaflet_map.py @@ -6,7 +6,6 @@ """ import argparse -import folium import geopandas as gpd @@ -26,6 +25,8 @@ def main( percent_of_variable: str = None, csv_geometry_field: str = CSV_GEOMETRY, gpkg_geometry_field: str = GPKG_GEOMETRY, + start_coords: list[float] = PLYMOUTH_COORDS, + zoom_level: int = ZOOM_LEVEL, ): """ Create a Leaflet HTML choropleth map using Folium for a chosen variable in a @@ -47,45 +48,11 @@ def main( # Calculate percent of variable if percent_of_variable is not None: - - variable = census.calc_percent_of_variable(variable, percent_of_variable) - # Create choropleth Leaflet map - create_choropleth_map( - census.mapped_data, - variable, - gpkg_geometry_field, - output_map, - ) + variable = census.calc_percent_of_variable(variable, percent_of_variable) - -def create_choropleth_map( - gdf: gpd.GeoDataFrame, - value_field: str, - geometry_field: str, - output_map: str, - start_coords: list[float] = PLYMOUTH_COORDS, - zoom_level: int = ZOOM_LEVEL, -): - """ - Create a Leaflet choropleth map using the folium.Choropleth method - """ - - # Create map centered over Plymouth - folium_map = folium.Map(start_coords, zoom_start=zoom_level) - - # Create choropleth features - choropleth = folium.Choropleth( - geo_data=gdf, - data=gdf, - columns=[geometry_field, value_field], - key_on=f"feature.properties.{geometry_field}", - legend_name=value_field, - highlight=True, - ).add_to(folium_map) - - # Write as HTML - folium_map.save(output_map) + # Create choropleth Leaflet map + census.create_choropleth_map(variable, output_map, start_coords, zoom_level) if __name__ == "__main__": diff --git a/scripts/plot_cornish_identity.py b/scripts/plot_cornish_identity.py new file mode 100755 index 0000000..dd68921 --- /dev/null +++ b/scripts/plot_cornish_identity.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +""" + +Author: William Jay, November 2025 +""" + +import argparse + +from census.nomis_census import Census + +AUTHORITY_OF_INTEREST = "Cornwall" +AUTHORITY_FIELD = "MSOA21NM" +CSV_GEOMETRY = "Area" +GPKG_GEOMETRY = "MSOA21NM" + + +def main( + csv_path: str, + gpkg_path: str, + output_plot: str, + authorities_to_keep: list[str] = AUTHORITY_OF_INTEREST, + authority_field: str = AUTHORITY_FIELD, + csv_geometry_field: str = CSV_GEOMETRY, + gpkg_geometry_field: str = GPKG_GEOMETRY, +): + """ + Middle Super Output Area polygons accessed from + https://www.data.gov.uk/dataset/677a5164-3a9e-4752-b8e6-5744d2b280ec/middle-layer-super-output-areas-december-2021-boundaries-ew-bgc-v3 + """ + + # Get Census object + census = Census(csv_path) + + # This dataset has MSOAs named with the prefix of 'msoa2021:' which needs removing + # in order to match the MSOA names in the GeoPackage. The following line removes + # characters to the left of and including ':' + census.data[csv_geometry_field] = ( + census.data[csv_geometry_field].str.split(":").str.get(1) + ) + + # Map data onto output area polygons + census.map_data_to_polygons(gpkg_path, csv_geometry_field, gpkg_geometry_field) + + # Remove MSOAs that are not Cornwall + census.mapped_data = census.mapped_data[ + census.mapped_data["MSOA21NM"].str.startswith("Cornwall") + ] + + fields_of_interest = [ + "UK identity: British only identity", + "UK identity: English only identity", + "UK identity: English and British only identity", + "UK identity: Cornish only identity", + "UK identity: Cornish and British only identity", + ] + total_field = "Total: All usual residents" + # List to populate new field names with + percent_fields = [] + + for field in fields_of_interest: + + # Calculate field as a percentage of the total per output area + new_field_name = census.calc_percent_of_variable(field, total_field) + # Add new field name to list + percent_fields.append(new_field_name) + + # Create simple choropleth leaflet map + census.create_choropleth_map( + "UK identity: Cornish only identity_percent", + output_plot, + [50.406, -4.848], + 9, + ) + + +if __name__ == "__main__": + helpstring = "" + parser = argparse.ArgumentParser( + description=helpstring, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--input_csv", + type=str, + required=True, + help="Unix-style glob to CSV files downloaded from NOMIS", + ) + parser.add_argument( + "--input_gpkg", + type=str, + required=True, + help="Path to GeoPackage file containing census Output Area features", + ) + parser.add_argument( + "--output_plot", + type=str, + required=True, + help="Path to write output plot to", + ) + + cmdline = parser.parse_args() + + main(cmdline.input_csv, cmdline.input_gpkg, cmdline.output_plot)