diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..f7e2def6 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +* text=auto eol=lf +*.sh text eol=lf diff --git a/.github/workflows/provision-and-deploy.yml b/.github/workflows/provision-and-deploy.yml index 37e61302..c22445b5 100644 --- a/.github/workflows/provision-and-deploy.yml +++ b/.github/workflows/provision-and-deploy.yml @@ -36,10 +36,43 @@ jobs: - name: Await reviewer approval run: echo "Approved for ${{ github.ref_name }}." - discover: + build-and-push: needs: gate + if: github.ref_name == 'master' || github.ref_name == 'prod' + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - uses: actions/checkout@v6.0.2 + + - uses: docker/setup-buildx-action@v4 + + - uses: docker/login-action@v4 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - id: meta + uses: docker/metadata-action@v6 + with: + images: ghcr.io/${{ github.repository }} + tags: type=ref,event=branch + + - uses: docker/build-push-action@v7 + with: + context: . + push: true + platforms: linux/amd64 + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + discover: + needs: build-and-push runs-on: ubuntu-latest - environment: ${{ github.ref_name == 'prod' && 'prod' || 'dev' }} permissions: contents: read @@ -99,13 +132,11 @@ jobs: needs: discover if: needs.discover.outputs.droplet_exists != 'true' runs-on: ubuntu-latest - environment: ${{ github.ref_name == 'prod' && 'prod' || 'dev' }} permissions: contents: read env: DEPLOY_BRANCH: ${{ github.ref_name }} - APP_PATH: ${{ vars.APP_PATH || '/opt/event-queue-bot' }} DO_REGION: ${{ vars.DO_REGION || 'nyc3' }} DO_SIZE: ${{ vars.DO_SIZE || 's-1vcpu-1gb' }} DO_IMAGE: ${{ vars.DO_IMAGE || 'ubuntu-24-04-x64' }} @@ -164,12 +195,17 @@ jobs: && (needs.provision.result == 'success' || needs.provision.result == 'skipped') runs-on: ubuntu-latest environment: ${{ github.ref_name == 'prod' && 'prod' || 'dev' }} + concurrency: + group: provision-and-deploy-bot-droplet + cancel-in-progress: false permissions: contents: read env: DEPLOY_BRANCH: ${{ github.ref_name }} - APP_PATH: ${{ vars.APP_PATH || '/opt/event-queue-bot' }} + APP_PATH: ${{ vars.APP_PATH || (github.ref_name == 'prod' && '/opt/event-queue-bot' || '/opt/event-queue-bot-nightly') }} + CONTAINER_NAME: ${{ github.ref_name == 'prod' && 'queue-bot' || 'queue-bot-nightly' }} + IMAGE_TAG: ${{ github.ref_name }} # Coalesce: fresh-provision output wins; otherwise discover's lookup. BOT_HOST: ${{ needs.provision.outputs.bot_host || needs.discover.outputs.bot_host }} @@ -191,6 +227,17 @@ jobs: with: ref: ${{ env.DEPLOY_BRANCH }} + - name: Install doctl + uses: digitalocean/action-doctl@v2.5.2 + with: + token: ${{ secrets.DIGITALOCEAN_TOKEN }} + version: 1.159.0 + + - name: Ensure firewall allows bot ports + env: + DO_DROPLET_NAME: ${{ vars.DO_DROPLET_NAME || 'event-queue-bot' }} + run: bash scripts/ensure-firewall.sh + - name: Configure SSH env: SSH_DEPLOY_PRIVATE_KEY: ${{ secrets.SSH_DEPLOY_PRIVATE_KEY }} @@ -218,7 +265,8 @@ jobs: - name: Wait for cloud-init run: | for attempt in {1..30}; do - if ssh -i ~/.ssh/bot_deploy_key -o ConnectTimeout=10 deploy@"${BOT_HOST}" "cloud-init status --wait && test -x /usr/local/bin/deploy-event-queue-bot"; then + if ssh -i ~/.ssh/bot_deploy_key -o ConnectTimeout=10 deploy@"${BOT_HOST}" \ + "cloud-init status --wait && test -x /usr/local/bin/deploy-event-queue-bot"; then exit 0 fi @@ -252,26 +300,35 @@ jobs: printf 'ENABLE_LEGACY_MIGRATION=%s\n' "${BOT_ENABLE_LEGACY_MIGRATION:-false}" printf 'FORCE_SEND_PATCH_NOTES=%s\n' "${BOT_FORCE_SEND_PATCH_NOTES:-false}" printf 'SILENT=%s\n' "${BOT_SILENT:-false}" + printf 'CONTAINER_NAME=%s\n' "${CONTAINER_NAME}" + printf 'IMAGE_TAG=%s\n' "${IMAGE_TAG}" } > "${RUNNER_TEMP}/bot.env" - - name: Sync repository to VPS + - name: Sync deploy artifacts to VPS run: | - rsync -az --delete \ - --exclude '.git' \ - --exclude '.github' \ - --exclude 'node_modules' \ - --exclude 'data/main.sqlite' \ - --exclude 'data/backups' \ - --exclude 'data/migrations/legacy-export' \ - --exclude 'logs' \ - --exclude '.env' \ + # Pre-create the bind-mount targets so Docker mounts a file/dir, not a + # new directory in place of the sqlite file. The compose file mounts + # data/main.sqlite and data/backups individually (not the whole data + # dir) so the image's data/migrations is not shadowed. + ssh -i ~/.ssh/bot_deploy_key deploy@"${BOT_HOST}" \ + "mkdir -p '${APP_PATH}/data/backups' && touch '${APP_PATH}/data/main.sqlite'" + rsync -az \ -e "ssh -i ~/.ssh/bot_deploy_key" \ - ./ deploy@"${BOT_HOST}":"${APP_PATH}/" + docker-compose.app.yml deploy@"${BOT_HOST}":"${APP_PATH}/" - name: Write bot environment run: | ssh -i ~/.ssh/bot_deploy_key deploy@"${BOT_HOST}" "umask 077 && cat > '${APP_PATH}/.env.tmp' && mv '${APP_PATH}/.env.tmp' '${APP_PATH}/.env'" < "${RUNNER_TEMP}/bot.env" + - name: Log in to GHCR on VPS + env: + GHCR_PULL_TOKEN: ${{ secrets.GHCR_PULL_TOKEN }} + run: | + if [ -n "${GHCR_PULL_TOKEN}" ]; then + ssh -i ~/.ssh/bot_deploy_key deploy@"${BOT_HOST}" \ + "echo '${GHCR_PULL_TOKEN}' | docker login ghcr.io -u '${{ github.actor }}' --password-stdin" + fi + - name: Deploy bot run: | - ssh -i ~/.ssh/bot_deploy_key deploy@"${BOT_HOST}" "sudo /usr/local/bin/deploy-event-queue-bot" + ssh -i ~/.ssh/bot_deploy_key deploy@"${BOT_HOST}" "sudo /usr/local/bin/deploy-event-queue-bot '${APP_PATH}'" diff --git a/.github/workflows/restart-bot.yml b/.github/workflows/restart-bot.yml index d6f9a1c9..59189999 100644 --- a/.github/workflows/restart-bot.yml +++ b/.github/workflows/restart-bot.yml @@ -35,7 +35,8 @@ jobs: env: DO_DROPLET_NAME: ${{ vars.DO_DROPLET_NAME || 'event-queue-bot' }} - APP_PATH: ${{ vars.APP_PATH || '/opt/event-queue-bot' }} + APP_PATH: ${{ vars.APP_PATH || (inputs.environment == 'prod' && '/opt/event-queue-bot' || '/opt/event-queue-bot-nightly') }} + CONTAINER_NAME: ${{ inputs.environment == 'prod' && 'queue-bot' || 'queue-bot-nightly' }} steps: - name: Validate required secrets @@ -87,4 +88,4 @@ jobs: BOT_HOST: ${{ steps.lookup.outputs.bot_host }} run: | ssh -i ~/.ssh/bot_deploy_key deploy@"${BOT_HOST}" \ - "cd '${APP_PATH}' && docker compose restart && docker logs --tail 100 queue-bot" + "cd '${APP_PATH}' && docker compose -f docker-compose.app.yml restart && docker logs --tail 100 '${CONTAINER_NAME}'" diff --git a/INFRA.md b/INFRA.md index 7c714cf1..87a9d9f5 100644 --- a/INFRA.md +++ b/INFRA.md @@ -1,13 +1,22 @@ # Infrastructure Setup -GitHub Actions provisions a DigitalOcean VPS with the official `doctl` CLI and -deploys the bot. Pushes to `master` deploy to a throwaway **dev** droplet; -promotion to prod is a deliberate `master → prod` PR merge. No local Terraform -or server setup is required. - -Each environment is split into a **gate** env (required reviewers, no secrets; -attached to the `discover` job) and a **secrets** env (no reviewers; attached to -`provision` and `deploy`). Approval is requested once per run. +GitHub Actions builds the bot image, pushes it to GHCR, provisions a single +DigitalOcean VPS with the official `doctl` CLI, and deploys the bot. Pushes to +`master` deploy the **dev** container; promotion to prod is a deliberate +`master → prod` PR merge. No local Terraform or server setup is required. + +Both environments share **one droplet**. Prod runs the `queue-bot` container +from `/opt/event-queue-bot`; dev runs the `queue-bot-nightly` container from +`/opt/event-queue-bot-nightly`. Each has its own `data/main.sqlite`, so they +share the box but not state. The `deploy` job derives the container name, app +path, and image tag from the branch and serializes prod/dev deploys via a shared +concurrency group. + +Each environment has a **gate** env (required reviewers, no secrets; attached to +the `gate` job) and a **secrets** env (no reviewers; attached to `deploy`). +Approval is requested once per run. The `build-and-push`, `discover`, and +`provision` jobs are not environment-scoped — they read repo-level secrets/vars +and target the single shared droplet. Shared infra secrets (DO token, SSH keys) live at the **repository** level and fall through from any environment. Bot identity (`BOT_APP_ID`, `BOT_TOKEN`) @@ -26,12 +35,16 @@ A `prod` environment and `prod` branch are required for prod deploys — see Create a DigitalOcean API token with these custom scopes: -- `droplet:read`, `droplet:create` +- `droplet:read`, `droplet:create`, `droplet:delete` - `ssh_key:read`, `ssh_key:create` - `firewall:read`, `firewall:create`, `firewall:update` - `tag:read`, `tag:create` - `project:read`, `project:create`, `project:update` +`droplet:delete` is required only when tearing down the droplet for a +re-provision (see [Re-provisioning via the CLI](#re-provisioning-via-the-cli)); +CI day-to-day deploys use the read/create scopes. + Save it as this GitHub repository secret: ```text @@ -107,37 +120,54 @@ When the prod promotion path is set up, the prod bot's `BOT_APP_ID` / The workflow generates the server `.env` file from these secrets during deploy. -## 5. Optional GitHub Variables +## 4b. GHCR image access -GitHub variables (not secrets). Set at repo level for shared values, or on a -specific environment (`prod`, `dev`) to override. Unset → falls back to -the default below. +The `build-and-push` job pushes the image to `ghcr.io/getboolean/event-queue-bot` +and the droplet pulls it during deploy. Make the pull work one of two ways: -| Variable | Default | -| --- | --- | -| `DO_REGION` | `nyc3` | -| `DO_SIZE` | `s-1vcpu-1gb` | -| `DO_IMAGE` | `ubuntu-24-04-x64` | -| `DO_DROPLET_NAME` | `event-queue-bot` | -| `DO_ENABLE_BACKUPS` | `false` | -| `DO_SWAP_SIZE` | `1G` | -| `APP_PATH` | `/opt/event-queue-bot` | -| `BOT_TOP_GG_TOKEN` | empty | -| `BOT_PATCH_NOTES_CHANNEL_ID` | empty | -| `BOT_DEFAULT_COLOR` | `Random` | -| `BOT_DEFAULT_SCHEDULE_TIMEZONE` | `america/chicago` | -| `BOT_ENABLE_LEGACY_MIGRATION` | `false` | -| `BOT_FORCE_SEND_PATCH_NOTES` | `false` | -| `BOT_SILENT` | `false` | +- **Public package (simplest):** in the GHCR package settings, set the package + visibility to public. No extra secret is needed. +- **Private package:** create a classic PAT with the `read:packages` scope and + save it as the repository secret `GHCR_PULL_TOKEN`. The deploy job uses it to + `docker login ghcr.io` on the droplet. If the secret is empty, the login step + is skipped (so it is safe to leave unset for a public package). + +## 5. Optional GitHub Variables + +GitHub variables (not secrets). The `DO_*` infra variables drive the single +shared droplet, so set them at the **repository** level. The `BOT_*` variables +are per-environment so prod and dev can differ. Unset → falls back to the +default below. + +| Variable | Scope | Default | +| --- | --- | --- | +| `DO_REGION` | repo | `nyc3` | +| `DO_SIZE` | repo | `s-1vcpu-1gb` | +| `DO_IMAGE` | repo | `ubuntu-24-04-x64` | +| `DO_DROPLET_NAME` | repo | `event-queue-bot` | +| `DO_ENABLE_BACKUPS` | repo | `false` | +| `DO_SWAP_SIZE` | repo | `1G` | +| `APP_PATH` | env | branch-derived (see above) | +| `BOT_TOP_GG_TOKEN` | env | empty | +| `BOT_PATCH_NOTES_CHANNEL_ID` | env | empty | +| `BOT_DEFAULT_COLOR` | env | `Random` | +| `BOT_DEFAULT_SCHEDULE_TIMEZONE` | env | `america/chicago` | +| `BOT_ENABLE_LEGACY_MIGRATION` | env | `false` | +| `BOT_FORCE_SEND_PATCH_NOTES` | env | `false` | +| `BOT_SILENT` | env | `false` | + +The app path, container name, and image tag are derived from the branch by the +`deploy` job (prod → `/opt/event-queue-bot` / `queue-bot` / `prod`; dev → +`/opt/event-queue-bot-nightly` / `queue-bot-nightly` / `master`). Override the +app path per environment with the optional `APP_PATH` variable. `DO_SWAP_SIZE` accepts a positive integer optionally suffixed `K`/`M`/`G`, or `0` to disable. Applied only at first boot via cloud-init — changing it doesn't affect existing droplets. -- **Prod (`s-1vcpu-1gb`)**: leave at `1G` default — gives node-gyp/`better-sqlite3` headroom - during `docker compose up --build` and lets the kernel evict idle anon pages in favor of FS - cache. Set to `0` to disable if you prefer prod to fail loudly on memory pressure rather than swap. -- **Dev (`s-1vcpu-512mb-10gb`)**: leave at `1G` default — without swap, `npm ci` OOMs during - native compile. +Leave `DO_SWAP_SIZE` at the `1G` default. The shared droplet runs both the prod +and dev containers; since images are now built in CI and only pulled on the box, +build-time memory pressure is gone, but swap still gives the two resident bots +headroom. If memory proves tight, bump `DO_SIZE` to `s-1vcpu-2gb`. Set `DO_ENABLE_BACKUPS` to `true` before the first deploy if you want DigitalOcean Droplet backups. Backups add 20% to the droplet cost. You can also back up the @@ -151,23 +181,32 @@ In GitHub: 2. Select `Provision and Deploy Bot`. 3. Run the workflow. -The workflow creates or reuses the VPS, writes `.env`, syncs the repo, and runs -Docker Compose. +The workflow builds and pushes the image to GHCR, creates or reuses the VPS, +syncs `docker-compose.app.yml`, writes `.env`, pulls the image, and runs Docker +Compose via `/usr/local/bin/deploy-event-queue-bot` (installed by cloud-init). +The deploy script and sudoers entry live in cloud-init — changing them requires +a re-provision. Firewall rules are reconciled by `scripts/ensure-firewall.sh` +in both the `provision` and `deploy` jobs, so firewall changes apply even when +provision is skipped. Future pushes to `master` deploy to dev automatically; each run pauses at -`gate` for `dev-gate` reviewer approval before `discover`, `provision`, and -`deploy` proceed. Prod is reached only by merging `master → prod` — see +`gate` for `dev-gate` reviewer approval before `build-and-push`, `discover`, +`provision`, and `deploy` proceed. Prod is reached only by merging +`master → prod` — see [Setting up the prod promotion path](#setting-up-the-prod-promotion-path). ## Setting up the prod promotion path Required for prod deploys. Without this, the workflow only ever targets dev. -Adds the prod-side droplet and the `master → prod` merge gate so feature work -auto-validates on dev and only reaches users when explicitly promoted. +Adds the prod environment and the `master → prod` merge gate so feature work +auto-validates on dev and only reaches users when explicitly promoted. Both +environments deploy to the **same** droplet (provisioned on the first dev or +prod run), so no second droplet is created — only the prod-side credentials and +the promotion workflow. -The default `dev` environment from §4 already covers the dev droplet (running -the dev Discord application from §4's `BOT_APP_ID`/`BOT_TOKEN`). What follows -sets up the *prod* side and the promotion workflow. +The default `dev` environment from §4 already runs the dev container (the dev +Discord application from §4's `BOT_APP_ID`/`BOT_TOKEN`). What follows sets up the +*prod* side and the promotion workflow. Maintainer's dev bot invite (for reference; install this on your own test guild so you can poke at it): @@ -178,35 +217,13 @@ guild so you can poke at it): 2. Create two GitHub environments: - `prod-gate` — required reviewers, no secrets/vars. - `prod` — no reviewers. -3. On `prod`, add `BOT_APP_ID` and `BOT_TOKEN` from step 1. -4. On `prod`, add these vars (shared infra secrets stay at repo level; the dev - environment from §4 carries the dev-droplet overrides): - - | Variable | Value | - | --- | --- | - | `DO_DROPLET_NAME` | `event-queue-bot` | - | `APP_PATH` | `/opt/event-queue-bot` | - | `DO_PROJECT_NAME` | `Event Queue Bot` | - | `DO_PROJECT_ENVIRONMENT` | `Production` | - | `DO_SIZE` | `s-1vcpu-1gb` | - - The dev environment should mirror the inverse (dev droplet name/path/size). - The relevant dev overrides (set on the `dev` environment): - - | Variable | Value | - | --- | --- | - | `DO_DROPLET_NAME` | `event-queue-bot-dev` | - | `APP_PATH` | `/opt/event-queue-bot-dev` | - | `DO_PROJECT_NAME` | `Event Queue Bot Dev` | - | `DO_PROJECT_ENVIRONMENT` | `Development` | - | `DO_SIZE` | `s-1vcpu-512mb-10gb` (cheapest Basic droplet, ~$4/mo; the bot fits in 512MB for dev) | - - At the 512 MB dev size, leave `DO_SWAP_SIZE` at its `1G` default — without swap, - `npm ci` OOMs during `better-sqlite3`'s native compile and the build wedges silently. - -5. Create the `prod` branch from `master` and push it. Pushes and merges to - `prod` deploy to the prod droplet, gated by `prod-gate`. -6. Add branch protection on `prod`: +3. On `prod`, add `BOT_APP_ID` and `BOT_TOKEN` from step 1, plus any per-env + `BOT_*` variables you want to differ from dev. Infra `DO_*` vars and secrets + stay at the repository level (shared droplet); there are no per-environment + droplet/path/size overrides. +4. Create the `prod` branch from `master` and push it. Pushes and merges to + `prod` deploy the prod container to the shared droplet, gated by `prod-gate`. +5. Add branch protection on `prod`: - Require a pull request before merging. - Require deployments to succeed before merging → add `dev`. This forces the head SHA to have already passed a dev deploy before it can land on @@ -221,9 +238,32 @@ protection confirms the head SHA succeeded on dev → merge → prod deploys [promote-pr]: https://github.com/getBoolean/Event-Queue-Bot/compare/prod...getBoolean:Event-Queue-Bot:master -Prod and dev share no state: separate droplets, separate `data/main.sqlite`, +Prod and dev share one droplet but no state: separate containers +(`queue-bot` vs `queue-bot-nightly`), separate app dirs and `data/main.sqlite`, separate Discord applications. +## Re-provisioning via the CLI + +When cloud-init changes (deploy script, sudoers, swap size, etc.), delete the +droplet and re-run the workflow so provision creates a fresh one. The same +`DIGITALOCEAN_TOKEN` secret CI uses works locally with `doctl`: + +```bash +export DIGITALOCEAN_TOKEN= +doctl auth init -t "$DIGITALOCEAN_TOKEN" +``` + +The token needs the scopes listed in [§1](#1-create-digitalocean-token), +including **`droplet:delete`** for teardown. Typical sequence: + +1. Back up both databases (see [Backup Before Deleting](#backup-before-deleting)). +2. Delete the droplet: `doctl compute droplet delete event-queue-bot` (or the + DO console). +3. Push the updated cloud-init and run `Provision and Deploy Bot` — provision + recreates the droplet, then deploy starts the containers. +4. Restore each database if needed (stop container, copy `main.sqlite` back, + restart). + ## Connect to the Droplet Get the droplet IPv4 from the latest workflow's `discover` job, `doctl compute @@ -243,33 +283,24 @@ re-run the workflow. ## Backup Before Deleting -The dev database is: +Both databases live on the shared droplet: ```text -/opt/event-queue-bot-dev/data/main.sqlite +Prod: /opt/event-queue-bot/data/main.sqlite +Dev: /opt/event-queue-bot-nightly/data/main.sqlite ``` -Download it before deleting the Droplet: +Download them before deleting the droplet: ```bash -scp deploy@your_server_ip:/opt/event-queue-bot-dev/data/main.sqlite ./main.sqlite.backup -``` - -To remove the dev deployment, delete these DigitalOcean resources: - -```text -Droplet: event-queue-bot-dev -Firewall: event-queue-bot-dev-ssh -SSH key: event-queue-bot-dev-deploy -Tag: event-queue-bot-dev +scp deploy@your_server_ip:/opt/event-queue-bot/data/main.sqlite ./main.sqlite.prod.backup +scp deploy@your_server_ip:/opt/event-queue-bot-nightly/data/main.sqlite ./main.sqlite.dev.backup ``` -If the prod promotion path is configured, prod uses the same resource names -without the `-dev` suffix (the suffix is derived from `DO_DROPLET_NAME` in -`scripts/provision-digitalocean.sh`): +To remove the deployment entirely, delete these DigitalOcean resources (names +derived from `DO_DROPLET_NAME` in `scripts/provision-digitalocean.sh`): ```text -Database: /opt/event-queue-bot/data/main.sqlite Droplet: event-queue-bot Firewall: event-queue-bot-ssh SSH key: event-queue-bot-deploy diff --git a/README-dev.md b/README-dev.md index 38844603..637a0c72 100644 --- a/README-dev.md +++ b/README-dev.md @@ -92,11 +92,16 @@ npm start ## Deploying via GitHub Actions -Pushes to `master` trigger `.github/workflows/provision-and-deploy.yml`, which runs three jobs: +Pushes to `master` trigger `.github/workflows/provision-and-deploy.yml`, which runs: -1. **`discover`** — looks up the DigitalOcean droplet by `DO_DROPLET_NAME`. -2. **`provision`** — creates the droplet via cloud-init only if none exists. -3. **`deploy`** — rsyncs the repo, writes `.env`, and runs `docker compose up -d --build` on the droplet. Pending Drizzle migrations apply automatically on container start. +1. **`build-and-push`** — builds the Docker image and pushes it to GHCR (`ghcr.io/getboolean/event-queue-bot`) tagged with the branch name (`master` for dev, `prod` for prod). +2. **`discover`** — looks up the single shared DigitalOcean droplet by `DO_DROPLET_NAME`. +3. **`provision`** — creates the droplet via cloud-init only if none exists. +4. **`deploy`** — syncs `docker-compose.app.yml`, writes `.env`, pulls the GHCR image, and runs `sudo /usr/local/bin/deploy-event-queue-bot` on the droplet. Pending Drizzle migrations apply automatically on container start. + +The deploy script is installed by cloud-init at `/usr/local/bin/deploy-event-queue-bot`. Changing it requires a re-provision. Firewall rules are reconciled by `scripts/ensure-firewall.sh` on every deploy. + +Both environments share one droplet: prod runs the `queue-bot` container from `/opt/event-queue-bot`, and dev runs the `queue-bot-nightly` container from `/opt/event-queue-bot-nightly`, each with its own `data/main.sqlite`. The `deploy` job derives the container name, app path, and image tag from the branch and serializes prod/dev deploys via a shared concurrency group. Secrets, variables, and SSH key setup live in [`INFRA.md`](INFRA.md). For SSH access to the droplet, see [`INFRA.md` → "Connect to the Droplet"](INFRA.md#connect-to-the-droplet). diff --git a/docker-compose.app.yml b/docker-compose.app.yml new file mode 100644 index 00000000..6d51fd40 --- /dev/null +++ b/docker-compose.app.yml @@ -0,0 +1,19 @@ +services: + app: + image: ghcr.io/getboolean/event-queue-bot:${IMAGE_TAG:-master} + container_name: ${CONTAINER_NAME:-queue-bot} + restart: always + env_file: .env + volumes: + # Mount only persistent state, not the whole data dir, so the + # data/migrations baked into the image is not shadowed by the host mount. + - ./data/main.sqlite:/app/data/main.sqlite # Bind mount for database + - ./data/backups:/app/data/backups # Bind mount for DB backups + - ./.env:/app/.env # Bind mount for .env file + stdin_open: true # Allow stdin to be open + tty: true # Allocate a pseudo-TTY + logging: + driver: json-file + options: + max-size: "10m" + max-file: "5" diff --git a/docs/image/event-queue-bot-dev-logo.png b/docs/image/event-queue-bot-dev-logo.png new file mode 100644 index 00000000..ca6a0d4a Binary files /dev/null and b/docs/image/event-queue-bot-dev-logo.png differ diff --git a/infra/digitalocean/cloud-init.yml b/infra/digitalocean/cloud-init.yml index aba7772e..fdfc992b 100644 --- a/infra/digitalocean/cloud-init.yml +++ b/infra/digitalocean/cloud-init.yml @@ -32,15 +32,25 @@ write_files: #!/usr/bin/env bash set -euo pipefail - APP_DIR={{APP_PATH_SHELL}} + if [ "$#" -ne 1 ]; then + echo "Usage: deploy-event-queue-bot " >&2 + exit 1 + fi + + APP_DIR="$1" - mkdir -p "$APP_DIR/data" "$APP_DIR/logs" + # The compose file mounts data/main.sqlite and data/backups individually + # (not the whole data dir) so the image's data/migrations is not shadowed. + # Pre-create both so Docker mounts a file/dir rather than creating a new + # directory in place of the sqlite file. + mkdir -p "$APP_DIR/data/backups" + touch "$APP_DIR/data/main.sqlite" chown -R deploy:deploy "$APP_DIR" cd "$APP_DIR" - if [ ! -f docker-compose.yml ]; then - echo "Missing $APP_DIR/docker-compose.yml; sync the repository before deploying." >&2 + if [ ! -f docker-compose.app.yml ]; then + echo "Missing $APP_DIR/docker-compose.app.yml; sync deploy artifacts before deploying." >&2 exit 1 fi @@ -49,9 +59,34 @@ write_files: exit 1 fi - docker compose up -d --build + # shellcheck disable=SC1091 + set -a + source .env + set +a + + if [ -z "${CONTAINER_NAME:-}" ]; then + echo "CONTAINER_NAME is required in $APP_DIR/.env" >&2 + exit 1 + fi + + docker compose -f docker-compose.app.yml pull + docker compose -f docker-compose.app.yml up -d docker image prune -f --filter "until=72h" - docker logs --tail 100 queue-bot + + for attempt in {1..12}; do + if docker inspect -f '{{.State.Running}}' "$CONTAINER_NAME" 2>/dev/null | grep -qx true; then + break + fi + if [ "$attempt" -eq 12 ]; then + echo "Bot container did not start after deploy" >&2 + docker ps -a --filter "name=$CONTAINER_NAME" || true + docker logs --tail 200 "$CONTAINER_NAME" 2>&1 || true + exit 1 + fi + sleep 2 + done + + docker logs --tail 100 "$CONTAINER_NAME" - path: /etc/sudoers.d/event-queue-bot-deploy owner: root:root @@ -81,5 +116,5 @@ runcmd: - apt-get update - apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin - systemctl enable --now docker - - mkdir -p {{APP_PATH_SHELL}}/data {{APP_PATH_SHELL}}/logs - - chown -R deploy:deploy {{APP_PATH_SHELL}} + - mkdir -p /opt/event-queue-bot/data /opt/event-queue-bot-nightly/data + - chown -R deploy:deploy /opt/event-queue-bot /opt/event-queue-bot-nightly diff --git a/scripts/ensure-firewall.sh b/scripts/ensure-firewall.sh new file mode 100644 index 00000000..fb7e322c --- /dev/null +++ b/scripts/ensure-firewall.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# Ensure the DigitalOcean firewall allows SSH (idempotent). +set -euo pipefail + +if ! command -v doctl >/dev/null 2>&1; then + echo "doctl is required" >&2 + exit 1 +fi + +if ! command -v jq >/dev/null 2>&1; then + echo "jq is required" >&2 + exit 1 +fi + +DO_DROPLET_NAME="${DO_DROPLET_NAME:-event-queue-bot}" +DO_FIREWALL_NAME="${DO_FIREWALL_NAME:-${DO_DROPLET_NAME}-ssh}" + +droplet_id="$(doctl compute droplet list \ + --format Name,ID \ + --no-header \ + | awk -v name="${DO_DROPLET_NAME}" '$1 == name { print $2; exit }')" + +if [ -z "${droplet_id}" ]; then + echo "No droplet named ${DO_DROPLET_NAME}; skipping firewall update." >&2 + exit 0 +fi + +inbound_rules="protocol:tcp,ports:22,address:0.0.0.0/0,address:::/0" +outbound_rules="protocol:icmp,address:0.0.0.0/0,address:::/0 protocol:tcp,ports:all,address:0.0.0.0/0,address:::/0 protocol:udp,ports:all,address:0.0.0.0/0,address:::/0" + +firewalls_json="$(doctl compute firewall list --output json)" +firewall_count="$( + jq -r --arg name "$DO_FIREWALL_NAME" '[.[] | select(.name == $name)] | length' <<< "$firewalls_json" +)" + +if [ "$firewall_count" -gt 1 ]; then + echo "Found multiple DigitalOcean Firewalls named ${DO_FIREWALL_NAME}" >&2 + exit 1 +fi + +if [ "$firewall_count" -eq 1 ]; then + firewall_id="$(jq -r --arg name "$DO_FIREWALL_NAME" '.[] | select(.name == $name) | .id' <<< "$firewalls_json")" + echo "Updating Firewall ${DO_FIREWALL_NAME}" + doctl compute firewall update "$firewall_id" \ + --name "$DO_FIREWALL_NAME" \ + --inbound-rules "$inbound_rules" \ + --outbound-rules "$outbound_rules" \ + --droplet-ids "$droplet_id" +else + echo "Creating Firewall ${DO_FIREWALL_NAME}" + doctl compute firewall create \ + --name "$DO_FIREWALL_NAME" \ + --inbound-rules "$inbound_rules" \ + --outbound-rules "$outbound_rules" \ + --droplet-ids "$droplet_id" +fi + +echo "Firewall ${DO_FIREWALL_NAME} allows SSH on port 22." diff --git a/scripts/provision-digitalocean.sh b/scripts/provision-digitalocean.sh index b6e112f6..d871c13d 100644 --- a/scripts/provision-digitalocean.sh +++ b/scripts/provision-digitalocean.sh @@ -49,14 +49,8 @@ DO_PROJECT_NAME="${DO_PROJECT_NAME:-Event Queue Bot}" DO_PROJECT_PURPOSE="${DO_PROJECT_PURPOSE:-Service or API}" DO_PROJECT_ENVIRONMENT="${DO_PROJECT_ENVIRONMENT:-Production}" DO_PROJECT_DESCRIPTION="${DO_PROJECT_DESCRIPTION:-}" -APP_PATH="${APP_PATH:-/opt/event-queue-bot}" DO_SWAP_SIZE="${DO_SWAP_SIZE:-1G}" -if [[ ! "$APP_PATH" =~ ^/[A-Za-z0-9._/-]+$ ]]; then - echo "APP_PATH must be an absolute path containing only letters, numbers, dots, underscores, dashes, and slashes" >&2 - exit 1 -fi - if [[ ! "$DO_SWAP_SIZE" =~ ^(0|[1-9][0-9]*[KMG]?)$ ]]; then echo "DO_SWAP_SIZE must match ^(0|[1-9][0-9]*[KMG]?)$ (e.g. 0 to disable, 512M, 1G)" >&2 exit 1 @@ -74,17 +68,14 @@ printf '%s\n' "$SSH_DEPLOY_PUBLIC_KEY" > "$public_key_file" ssh_key_fingerprint="$(ssh-keygen -E md5 -lf "$public_key_file" | awk '{print $2}' | sed 's/^MD5://')" ssh_public_key_yaml="$(yaml_quote "$SSH_DEPLOY_PUBLIC_KEY")" -app_path_shell="'$APP_PATH'" ssh_host_private_key_b64="$(printf '%s' "$SSH_HOST_PRIVATE_KEY" | base64 -w0)" SSH_PUBLIC_KEY_YAML="$ssh_public_key_yaml" \ -APP_PATH_SHELL="$app_path_shell" \ SSH_HOST_PRIVATE_KEY_B64="$ssh_host_private_key_b64" \ SSH_HOST_PUBLIC_KEY="$SSH_HOST_PUBLIC_KEY" \ DO_SWAP_SIZE="$DO_SWAP_SIZE" \ perl -0pe ' s/\{\{SSH_PUBLIC_KEY_YAML\}\}/$ENV{SSH_PUBLIC_KEY_YAML}/g; - s/\{\{APP_PATH_SHELL\}\}/$ENV{APP_PATH_SHELL}/g; s/\{\{SSH_HOST_PRIVATE_KEY_B64\}\}/$ENV{SSH_HOST_PRIVATE_KEY_B64}/g; s/\{\{SSH_HOST_PUBLIC_KEY\}\}/$ENV{SSH_HOST_PUBLIC_KEY}/g; s/\{\{DO_SWAP_SIZE\}\}/$ENV{DO_SWAP_SIZE}/g; @@ -247,36 +238,8 @@ if [ -n "$DO_PROJECT_NAME" ]; then doctl projects resources assign "$project_id" --resource="do:droplet:${droplet_id}" >/dev/null fi -echo "Listing Firewalls" -firewalls_json="$(doctl compute firewall list --output json)" -firewall_count="$( - jq -r --arg name "$DO_FIREWALL_NAME" '[.[] | select(.name == $name)] | length' <<< "$firewalls_json" -)" - -if [ "$firewall_count" -gt 1 ]; then - echo "Found multiple DigitalOcean Firewalls named ${DO_FIREWALL_NAME}" >&2 - exit 1 -fi - -inbound_rules="protocol:tcp,ports:22,address:0.0.0.0/0,address:::/0" -outbound_rules="protocol:icmp,address:0.0.0.0/0,address:::/0 protocol:tcp,ports:all,address:0.0.0.0/0,address:::/0 protocol:udp,ports:all,address:0.0.0.0/0,address:::/0" - -if [ "$firewall_count" -eq 1 ]; then - firewall_id="$(jq -r --arg name "$DO_FIREWALL_NAME" '.[] | select(.name == $name) | .id' <<< "$firewalls_json")" - echo "Updating Firewall ${DO_FIREWALL_NAME}" - doctl compute firewall update "$firewall_id" \ - --name "$DO_FIREWALL_NAME" \ - --inbound-rules "$inbound_rules" \ - --outbound-rules "$outbound_rules" \ - --droplet-ids "$droplet_id" -else - echo "Creating Firewall ${DO_FIREWALL_NAME}" - doctl compute firewall create \ - --name "$DO_FIREWALL_NAME" \ - --inbound-rules "$inbound_rules" \ - --outbound-rules "$outbound_rules" \ - --droplet-ids "$droplet_id" -fi +DO_DROPLET_NAME="$DO_DROPLET_NAME" DO_FIREWALL_NAME="$DO_FIREWALL_NAME" \ + bash "$(dirname "$0")/ensure-firewall.sh" for attempt in {1..60}; do droplet_json="$(doctl compute droplet get "$droplet_id" --output json)"