Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8d8dc5b
Add email_verified field to user model
marius-mather Sep 15, 2025
f40bbcd
Use Auth0 email_verified when creating from Auth0 data
marius-mather Sep 15, 2025
fcdfedb
Add migration for email_verified column
marius-mather Sep 15, 2025
944d444
Add apscheduler dependency
marius-mather Sep 15, 2025
bc101b8
Raise for status when fetching users
marius-mather Sep 15, 2025
0f9bef9
Add email_verified to DB admin
marius-mather Sep 16, 2025
44ab5fd
Methods to update user from Auth0 data
marius-mather Sep 16, 2025
954eeb4
Use auto_approve when creating group membership
marius-mather Sep 16, 2025
16ff141
Set default value in email_verified migration
marius-mather Sep 16, 2025
d3667b2
Increase database pool size
marius-mather Sep 16, 2025
f8472cc
Job scheduler
marius-mather Sep 16, 2025
3dac1c7
Tasks to sync user info with Auth0
marius-mather Sep 16, 2025
177bfde
Launch script for the job scheduler
marius-mather Sep 16, 2025
f876cd2
Add loguru for easy logging
marius-mather Sep 16, 2025
39394e9
Use loguru for all task logging
marius-mather Sep 16, 2025
21032b1
Add pytest-asyncio so we can test async functions
marius-mather Sep 16, 2025
c50656c
datagen functioon for UsersWithTotals
marius-mather Sep 16, 2025
d8a3e03
Test scheduled tasks
marius-mather Sep 16, 2025
759aa77
Ensure the default get_engine is not used in tests
marius-mather Sep 16, 2025
4805300
Patch engine for test of DB admin
marius-mather Sep 16, 2025
77e1ad7
Make job run hourly
marius-mather Sep 16, 2025
2fa4d88
Fix calculation of number of users fetched - start is number of items…
marius-mather Sep 16, 2025
6d41db9
Fix error logging in scheduler
marius-mather Sep 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion auth0/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class UsersWithTotals(BaseModel):
"""
Response from Auth0 users API when include_totals is True.

:var start: 0-based page number
:var start: index of the first item
:var limit: number of items per page
:var total: total number of items
"""
Expand Down Expand Up @@ -107,6 +107,7 @@ def get_users(self, page: Optional[int] = None, per_page: Optional[int] = None,
params["search_engine"] = "v3"
url = f"https://{self.domain}/api/v2/users"
resp = self._client.get(url, params=params)
resp.raise_for_status()
if include_totals:
return UsersWithTotals(**resp.json())
return self._convert_users(resp)
Expand Down
2 changes: 1 addition & 1 deletion db/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class BiocommonsUserAdmin(ModelView, model=BiocommonsUser):
can_edit = False
can_create = False
can_delete = True
column_list = ["id", "username", "email", "created_at"]
column_list = ["id", "username", "email", "email_verified", "created_at"]
column_default_sort = ("created_at", True)


Expand Down
21 changes: 19 additions & 2 deletions db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class BiocommonsUser(BaseModel, table=True):
# Note: sqlmodel can't validate emails easily.
# Use a separate data model to validate this
email: str = Field(unique=True)
email_verified: bool = Field(default=False, nullable=False)
username: str = Field(unique=True)
created_at: AwareDatetime = Field(
default_factory=lambda: datetime.now(timezone.utc), sa_type=DateTime
Expand Down Expand Up @@ -53,7 +54,7 @@ def from_auth0_data(cls, data: 'schemas.biocommons.Auth0UserData') -> Self:
"""
Create a new BiocommonsUser object from Auth0 user data (no API call).
"""
return cls(id=data.user_id, email=data.email, username=data.username)
return cls(id=data.user_id, email=data.email, username=data.username, email_verified=data.email_verified)

@classmethod
def get_or_create(
Expand All @@ -69,6 +70,22 @@ def get_or_create(
db_session.commit()
return user

def update_from_auth0(self, auth0_id: str, auth0_client: Auth0Client) -> Self:
"""
Fetch user data from Auth0 and update this object with it.
Currently only updates email_verified.
"""
user_data = auth0_client.get_user(user_id=auth0_id)
return self.update_from_auth0_data(user_data)

def update_from_auth0_data(self, data: 'schemas.biocommons.Auth0UserData') -> Self:
"""
Update this object with data from Auth0, without fetching.
Currently only updates email_verified.
"""
self.email_verified = data.email_verified
return self

def add_platform_membership(
self, platform: PlatformEnum, db_session: Session, auto_approve: bool = False
) -> "PlatformMembership":
Expand All @@ -90,7 +107,7 @@ def add_group_membership(
membership = GroupMembership(
group_id=group_id,
user_id=self.id,
approval_status=ApprovalStatusEnum.PENDING,
approval_status=ApprovalStatusEnum.APPROVED if auto_approve else ApprovalStatusEnum.PENDING,
updated_by_id=None,
)
db_session.add(membership)
Expand Down
8 changes: 7 additions & 1 deletion db/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,13 @@ def get_engine():
global _engine
if _engine is None:
db_url, db_connect_args = get_db_config()
_engine = create_engine(db_url, connect_args=db_connect_args)
_engine = create_engine(
db_url,
connect_args=db_connect_args,
pool_size=20,
max_overflow=20,
pool_timeout=60,
)
return _engine


Expand Down
31 changes: 31 additions & 0 deletions migrations/versions/1546c07b9d78_user_email_verified.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""user_email_verified

Revision ID: 1546c07b9d78
Revises: a8cb5fd2d258
Create Date: 2025-09-15 11:37:09.829832

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
import sqlmodel


# revision identifiers, used by Alembic.
revision: str = '1546c07b9d78'
down_revision: Union[str, None] = 'a8cb5fd2d258'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('biocommons_user', sa.Column('email_verified', sa.Boolean(), server_default=sa.false(), nullable=False))
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('biocommons_user', 'email_verified')
# ### end Alembic commands ###
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,15 @@ dependencies = [
"authlib>=1.6.1",
"sqladmin>=0.21.0",
"itsdangerous>=2.2.0",
"apscheduler[redis,sqlalchemy]~=3.11",
"loguru>=0.7.3",
]

[project.optional-dependencies]
dev = [
"pytest>=8.3.5",
"pytest-mock>=3.14.0",
"pytest-asyncio~=1.2",
"pytest-cov>=4.1.0",
"ruff>=0.4.4",
"polyfactory>=2.21.0",
Expand Down
47 changes: 47 additions & 0 deletions run_scheduler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import asyncio
import signal
import sys

from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.interval import IntervalTrigger
from loguru import logger

from scheduled_tasks.scheduler import SCHEDULER
from scheduled_tasks.tasks import sync_auth0_users


def schedule_jobs(scheduler: AsyncIOScheduler):
hourly_trigger = IntervalTrigger(minutes=60)
logger.info("Adding job: sync_auth0_users")
scheduler.add_job(
sync_auth0_users,
trigger=hourly_trigger,
id="sync_auth0_users",
replace_existing=True
)


async def main():
logger.info("Setting up scheduler")
schedule_jobs(SCHEDULER)
logger.info("Starting scheduler")
SCHEDULER.start()
logger.info("Scheduler started, waiting for shutdown...")
# Wait for shutdown
stop = asyncio.Event()
loop = asyncio.get_running_loop()
for sig in (signal.SIGINT, signal.SIGTERM):
loop.add_signal_handler(sig, stop.set)
await stop.wait()
logger.info("Stopping scheduler")
SCHEDULER.shutdown(wait=False)


if __name__ == "__main__":
logger.remove()
logger.add(
sys.stderr,
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green>\t<level>{level}</level>\t{message}\t<blue>{name}</blue>\t<cyan>{extra}</cyan>",
level="INFO"
)
asyncio.run(main())
Empty file added scheduled_tasks/__init__.py
Empty file.
45 changes: 45 additions & 0 deletions scheduled_tasks/scheduler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from apscheduler.events import (
EVENT_JOB_ERROR,
EVENT_JOB_EXECUTED,
EVENT_JOB_MISSED,
JobExecutionEvent,
)
from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from loguru import logger


def job_listener(event: JobExecutionEvent):
with logger.contextualize(job_id=event.job_id, run_time=getattr(event, "scheduled_run_time", None)):
if event.code == EVENT_JOB_EXECUTED:
logger.info("job executed successfully")
elif event.code == EVENT_JOB_ERROR:
logger.error("job failed: {event.exception}\n{event.traceback}")
elif event.code == EVENT_JOB_MISSED:
logger.warning("job missed its run time")


def create_scheduler():
from db.setup import get_db_config
db_url, _ = get_db_config()
jobstores = {
"default": SQLAlchemyJobStore(url=db_url)
}
executors = {
"default": {"type": "asyncio"},
}
scheduler = AsyncIOScheduler(
jobstores=jobstores,
executors=executors,
job_defaults={
"misfire_grace_time": 5 * 60,
"coalesce": True,
},
timezone="UTC"
)
scheduler.add_listener(job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR | EVENT_JOB_MISSED)
return scheduler


# Only want to create the scheduler once
SCHEDULER = create_scheduler()
45 changes: 45 additions & 0 deletions scheduled_tasks/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from loguru import logger

from auth.management import get_management_token
from auth0.client import Auth0Client
from config import get_settings
from db.models import BiocommonsUser
from db.setup import get_db_session
from scheduled_tasks.scheduler import SCHEDULER
from schemas.biocommons import Auth0UserData


async def sync_auth0_users():
logger.info("Setting up Auth0 client")
settings = get_settings()
token = get_management_token(settings=settings)
auth0_client = Auth0Client(domain=settings.auth0_domain, management_token=token)
current_page = 1
logger.info("Fetching users")
users = auth0_client.get_users(page=current_page, per_page=50, include_totals=True)
while True:
for user in users.users:
SCHEDULER.add_job(update_auth0_user, args=[user], id=f"update_user_{user.user_id}", replace_existing=True)
current_fetched = users.start + len(users.users)
if current_fetched >= users.total:
break
current_page += 1
logger.info(f"Fetching page {current_page}")
users = auth0_client.get_users(page=current_page, per_page=50, include_totals=True)


async def update_auth0_user(user_data: Auth0UserData):
logger.info(f"Checking user {user_data.user_id}")
session = next(get_db_session())
db_user = session.get(BiocommonsUser, user_data.user_id)
if db_user is None:
logger.info(" User not found in DB")
return False
db_user.update_from_auth0_data(user_data)
if session.is_modified(db_user):
logger.info(" User data changed, updating in DB")
else:
logger.info(" User data unchanged")
# Should be OK to commit as SQLAlchemy will only update modified fields
session.commit()
return True
1 change: 1 addition & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def disable_db_setup(mocker):
by test fixtures
"""
mocker.patch("db.setup.create_db_and_tables", return_value=None)
mocker.patch("db.setup.get_engine", return_value=None)


@pytest.fixture
Expand Down
18 changes: 17 additions & 1 deletion tests/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from polyfactory.factories.pydantic_factory import ModelFactory
from pydantic import TypeAdapter, ValidationError

from auth0.client import EmailVerificationResponse
from auth0.client import EmailVerificationResponse, UsersWithTotals
from schemas.biocommons import (
ALLOWED_SPECIAL_CHARS,
Auth0UserData,
Expand Down Expand Up @@ -137,3 +137,19 @@ def bundle(cls) -> str:


class EmailVerificationResponseFactory(ModelFactory[EmailVerificationResponse]): ...


class UsersWithTotalsFactory(ModelFactory[UsersWithTotals]):
"""
Factory for generating Auth0 users API response.
It's tricky to define this factory so total/start/limit always match, best
to define them manually in each test.
"""
total = 20
limit = 10
start = 0

@post_generated
@classmethod
def users(cls, limit: int) -> list[Auth0UserData]:
return Auth0UserDataFactory.batch(size=limit)
5 changes: 3 additions & 2 deletions tests/db/test_db_admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,11 @@ def test_admin_auth_authenticate_empty_roles_list_redirects(mock_settings):
assert result == "redirect_response"


def test_admin_panel_access_with_valid_admin_session(test_client, mock_settings):
def test_admin_panel_access_with_valid_admin_session(test_client, mock_settings, test_db_engine):
"""Test that admin panel is accessible with valid admin session"""
with patch("db.admin.get_settings", return_value=mock_settings), \
patch("db.admin.setup_oauth") as mock_setup_oauth:
patch("db.admin.setup_oauth") as mock_setup_oauth, \
patch("db.admin.get_engine", return_value=test_db_engine):
mock_oauth_client = Mock()
mock_oauth_client.authorize_redirect = AsyncMock()
mock_oauth_client.authorize_access_token = AsyncMock()
Expand Down
2 changes: 2 additions & 0 deletions tests/db/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def test_create_biocommons_user(test_db_session):
assert user.id == auth0_id
assert user.email == email
assert user.username == "user_name"
assert not user.email_verified


def test_create_biocommons_user_from_auth0(test_db_session, mock_auth0_client):
Expand All @@ -71,6 +72,7 @@ def test_create_biocommons_user_from_auth0(test_db_session, mock_auth0_client):
assert user.id == user_data.user_id
assert user.email == user_data.email
assert user.username == user_data.username
assert user.email_verified == user_data.email_verified


def test_get_or_create_biocommons_user(test_db_session, mock_auth0_client, persistent_factories):
Expand Down
Empty file.
50 changes: 50 additions & 0 deletions tests/scheduled_tasks/test_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from unittest.mock import MagicMock

import pytest

from scheduled_tasks.tasks import sync_auth0_users, update_auth0_user
from tests.datagen import Auth0UserDataFactory, UsersWithTotalsFactory
from tests.db.datagen import BiocommonsUserFactory


@pytest.mark.asyncio
async def test_sync_auth0_users(mocker, test_client):
"""
Test syncing Auth0 users - users are fetched until the total is reached,
update task is scheduled for each user
"""
mock_auth0_instance = MagicMock()
mocker.patch("scheduled_tasks.tasks.Auth0Client", return_value=mock_auth0_instance)
mocker.patch("scheduled_tasks.tasks.get_settings")
mocker.patch("scheduled_tasks.tasks.get_management_token")
batch1 = UsersWithTotalsFactory.build(total=20, start=0, limit=10)
batch2 = UsersWithTotalsFactory.build(total=20, start=10, limit=10)
mock_scheduler = mocker.patch("scheduled_tasks.tasks.SCHEDULER")
mock_auth0_instance.get_users.side_effect = [batch1, batch2]
await sync_auth0_users()
assert mock_auth0_instance.get_users.call_count == 2
# Check add_job was called for the number of users
assert mock_scheduler.add_job.call_count == 20


@pytest.mark.asyncio
async def test_update_auth0_user(test_db_session, mocker, persistent_factories):
"""
Test email_verified is updated correctly when updating user from Auth0
"""
user_data = Auth0UserDataFactory.build(
email_verified=True
)
db_user = BiocommonsUserFactory.create_sync(
id=user_data.user_id,
email=user_data.email,
username=user_data.username,
email_verified=False
)
mocker.patch("scheduled_tasks.tasks.get_db_session",
# Needs to be a generator that yields the session
return_value=(test_db_session for _ in range(1)))
await update_auth0_user(user_data=user_data)
test_db_session.flush()
test_db_session.refresh(db_user)
assert db_user.email_verified is True
Loading