Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ python-dateutil = "*"
retry = "*"
black = "*"
rich = "*"
arcgis2geojson = "*"

[dev-packages]
pytest-mypy-plugins = "*"
Expand Down
102 changes: 38 additions & 64 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions lgsf/conf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def __init__(self):
"councillors",
"templates",
"metadata",
"polling_stations"
# 'parties',
# "scrapers",
# 'reconcilers',
Expand Down
Empty file.
97 changes: 97 additions & 0 deletions lgsf/polling_stations/commands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
from rich.progress import Progress

from lgsf.commands.base import PerCouncilCommandBase
from lgsf.path_utils import load_scraper, load_council_info


class Command(PerCouncilCommandBase):

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Feels like a large chunk of this file could be refactored in to a common file - it's largely the same as the councillors one, and it would be good to keep the outputs and interfaces as similar as we can I think.

This can be an issue of course, not needed for this PR

command_name = "polling_stations"

def add_arguments(self, parser):
parser.add_argument(
"--check-only",
action="store_true",
help="Just check for updated pages, don't scrape anything",
)
parser.add_argument(
"--list-missing",
action="store_true",
help="Print missing councils",
)
parser.add_argument(
"--list-disabled",
action="store_true",
help="Print disabled councils",
)

def _run_single(self, scraper, progress, summary):
try:
progress.console.print(scraper.options["council"])
scraper.run()
summary["completed"] += 1
except KeyboardInterrupt:
raise
except:
if self.options.get("verbose"):
raise
summary["failed"] += 1
progress.console.print(
"Error running asdasd {}, see {} for more".format(
self.options["council"], scraper._error_file_name()
),
style="red",
)

def handle(self, options):
self.options = options
if options["list_missing"]:
self.output_missing()

if options["list_disabled"]:
self.output_disabled()

self.output_status()

self.normalise_codes()
to_run = self.councils_to_run()
summary = {
"completed": 0,
"missing scraper": 0,
"failed": 0,
"skipped": 0,
}
with Progress() as progress:
tasks = {
"total": progress.add_task(description=f"Total", total=len(to_run)),
}

while not progress.finished:
for council in to_run:
self.options["council"] = council
self.options["council_info"] = load_council_info(council)
scraper_cls = load_scraper(council, self.command_name)
if not scraper_cls:
summary["missing scraper"] += 1
continue
with scraper_cls((self.options), progress.console) as scraper:
should_run = True
if scraper.disabled:
should_run = False

if should_run and options["refresh"]:
if scraper.run_since():
should_run = False

if should_run and options["tags"]:
required_tags = set(options["tags"].split(","))
scraper_tags = set(scraper.get_tags)
if not required_tags.issubset(scraper_tags):
should_run = False

if should_run:
self._run_single(scraper, progress, summary)
else:
summary["skipped"] += 1

progress.update(tasks["total"], advance=1)
self.console.print(summary)
23 changes: 23 additions & 0 deletions lgsf/polling_stations/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import json


class PollingStationsList:
def __init__(self, stations):
self.stations = stations

def as_file_name(self):
return "stations"

def as_json(self):
return json.dumps(self.stations, indent=4)


class PollingDistrictsList:
def __init__(self, districts):
self.districts = districts

def as_file_name(self):
return "districts"

def as_json(self):
return json.dumps(self.districts, indent=4)
Empty file.
49 changes: 49 additions & 0 deletions lgsf/polling_stations/scrapers/arcgis_scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import json
from arcgis2geojson import arcgis2geojson


from lgsf.polling_stations.scrapers.common import PollingStationScraperBase


class ArcGisScraper(PollingStationScraperBase):
encoding = "utf-8"
key = "OBJECTID"

def make_geometry(self, feature):
return json.dumps(arcgis2geojson(feature), sort_keys=True)

def get_data(self, url): # pragma: no cover
response = self.get(url)
data_str = response.content
data = json.loads(data_str.decode(self.encoding))
return (data_str, data)

def process_feature(self, feature, fields=None):
# assemble record
record = {
"council_id": self.council_id,
"geometry": self.make_geometry(feature),
}
for field in fields:
value = feature["attributes"][field["name"]]
if isinstance(value, str):
record[field["name"]] = value.strip()
else:
record[field["name"]] = value
return record

def scrape(self, url, type="features"):
# load json
data_str, data = self.get_data(url)
print(f"found {len(data['features'])} {type}")

# grab field names
fields = data["fields"]
features = data["features"]

return self.process_features(features, fields)

# print summary
# summarise(self.table)

# self.store_history(data_str, self.council_id)
Loading