diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d29e94b..2d95e02 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,7 +13,7 @@ jobs: name: Build and Push strategy: matrix: - service: [tesseract-lambda-python,keyword-lambda-python,tesseract-web,tesseract-lambda] + service: [tesseract-lambda-python,keyword-lambda-python,tesseract-web,tesseract-lambda,vision-web] fail-fast: false runs-on: ubuntu-latest steps: diff --git a/workers/vision-web/.env.example b/workers/vision-web/.env.example new file mode 100644 index 0000000..a05742a --- /dev/null +++ b/workers/vision-web/.env.example @@ -0,0 +1,10 @@ +# true/false. If true, enables some dev helpers +ENABLE_DEV=true +# Points to where the backend is located. This should not have a trailing slash +ASHIRT_BACKEND_URL=http://10.0.0.100:3000 +# The access key of a headless user +ASHIRT_ACCESS_KEY=gR6nVtaQmp2SvzIqLUWdedDk +# The secret key (in base64 format -- how it is delivered via the ashirt UI) of a headless user +ASHIRT_SECRET_KEY=WvtvxFaJS0mPs82nCzqamI+bOGXpq7EIQhg4UD8nxS5448XG9N0gNAceJGBLPdCA3kAzC4MdUSHnKCJ/lZD++A== +# Add a list of questions +VISION_QUESTIONS="What does the image say?" \ No newline at end of file diff --git a/workers/vision-web/.gitignore b/workers/vision-web/.gitignore new file mode 100644 index 0000000..ab4fdde --- /dev/null +++ b/workers/vision-web/.gitignore @@ -0,0 +1,25 @@ +# compiled output +__pycache__ + +# OS +.DS_Store + +# IDEs and editors +/.idea +.project +.classpath +.c9/ +*.launch +.settings/ +*.sublime-workspace + +# IDE - VSCode +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json + +# local env files +.env* +!.env.example diff --git a/workers/vision-web/Dockerfile b/workers/vision-web/Dockerfile new file mode 100644 index 0000000..40f1965 --- /dev/null +++ b/workers/vision-web/Dockerfile @@ -0,0 +1,37 @@ +FROM --platform=linux/amd64 python:3.12-slim AS builder + +WORKDIR /build + +COPY Pipfile.lock Pipfile ./ +RUN pip install --user pipenv +RUN /root/.local/bin/pipenv requirements > requirements.txt +### + +FROM --platform=linux/amd64 python:3.12-slim AS runner + +WORKDIR /app +COPY --from=builder /build/requirements.txt . +################################### +# Install other dependencies here # +################################### + +RUN pip install -r requirements.txt +RUN pip install --pre onnxruntime-genai numpy huggingface_hub +# Download the required model components +RUN huggingface-cli download microsoft/Phi-3-vision-128k-instruct-onnx-cpu --include cpu-int4-rtn-block-32-acc-level-4/* --local-dir model/ + +# COPY bin/docker_start.sh ./start.sh +COPY src . + +EXPOSE 8080 + +# Run as Alpine's guest user +USER 405 + +# some guidance on using gunicorn in containers: +# https://pythonspeed.com/articles/gunicorn-in-docker/ +CMD ["gunicorn", "--worker-tmp-dir", "/dev/shm", \ + "--workers=1", "--worker-class=gthread", \ + "--log-file=-", \ + "--timeout=600", \ + "-b", "0.0.0.0:8080", "wsgi:app"] \ No newline at end of file diff --git a/workers/vision-web/Dockerfile.dev b/workers/vision-web/Dockerfile.dev new file mode 100644 index 0000000..9f70940 --- /dev/null +++ b/workers/vision-web/Dockerfile.dev @@ -0,0 +1,30 @@ +FROM --platform=linux/amd64 python:3.12-slim + +# With help from https://pipenv.pypa.io/en/latest/basics/#pipenv-and-docker-containers + +WORKDIR /app +ENV PIPENV_VENV_IN_PROJECT=1 + +RUN pip install --user pipenv huggingface_hub +# Download the required model components +RUN /root/.local/bin/huggingface-cli download microsoft/Phi-3-vision-128k-instruct-onnx-cpu --include cpu-int4-rtn-block-32-acc-level-4/* --local-dir model/ + +################################### +# Install other dependencies here # +################################### + +COPY Pipfile.lock Pipfile ./ +RUN /root/.local/bin/pipenv sync +RUN /root/.local/bin/pipenv run pip install --pre onnxruntime-genai numpy + +ENV FLASK_APP src/wsgi.py +ENV FLASK_DEBUG 1 +ENV FLASK_ENV=developement + +EXPOSE 8080 + +COPY src ./src/ + +CMD ["/root/.local/bin/pipenv", "run", \ + "flask", "run", \ + "--host=0.0.0.0", "--port=8080"] diff --git a/workers/vision-web/Makefile b/workers/vision-web/Makefile new file mode 100644 index 0000000..f9662bf --- /dev/null +++ b/workers/vision-web/Makefile @@ -0,0 +1,30 @@ +imageNameDev = vision-web-dev +imageName = vision-web + +.PHONY: build_dev +build_dev: + docker build -t $(imageNameDev) -f Dockerfile.dev . + +.PHONY: build_release +build_release: + docker build -t $(imageName) -f Dockerfile.prod . + +run_dev: + docker compose up --build + +.PHONY: test-test +test-test: + curl -XPOST "http://localhost:9000/test" -d '{"type": "test"}' + +.PHONY: test-process-img +test-process-img: + curl -XPOST "http://localhost:9000/process" \ + -H "Content-Type: application/json" \ + -d '{"type": "evidence_created", "evidenceUuid": "seed_dursleys", "operationSlug": "HPSS", "contentType": "image"}' + +.PHONY: test-unsupported +test-unsupported: + curl -XPOST "http://localhost:9000/process" \ + -H "Content-Type: application/json" \ + -d '{"type": "unsupported"}' + diff --git a/workers/vision-web/Pipfile b/workers/vision-web/Pipfile new file mode 100644 index 0000000..4753159 --- /dev/null +++ b/workers/vision-web/Pipfile @@ -0,0 +1,17 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +flask = "*" +gunicorn = "*" +requests = "*" +structlog = "*" +python-dotenv = "*" + +[dev-packages] +autopep8 = "*" + +[requires] +python_version = "3.12" diff --git a/workers/vision-web/Pipfile.lock b/workers/vision-web/Pipfile.lock new file mode 100644 index 0000000..f14c522 --- /dev/null +++ b/workers/vision-web/Pipfile.lock @@ -0,0 +1,318 @@ +{ + "_meta": { + "hash": { + "sha256": "bca5f134f8fc8994b66fee4604e9c92d368d151e6e8e146b80f94aecd44e3200" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.12" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "blinker": { + "hashes": [ + "sha256:1779309f71bf239144b9399d06ae925637cf6634cf6bd131104184531bf67c01", + "sha256:8f77b09d3bf7c795e969e9486f39c2c5e9c39d4ee07424be2bc594ece9642d83" + ], + "markers": "python_version >= '3.8'", + "version": "==1.8.2" + }, + "certifi": { + "hashes": [ + "sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516", + "sha256:ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56" + ], + "markers": "python_version >= '3.6'", + "version": "==2024.6.2" + }, + "charset-normalizer": { + "hashes": [ + "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027", + "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087", + "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786", + "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8", + "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09", + "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185", + "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574", + "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e", + "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519", + "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898", + "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269", + "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3", + "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f", + "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6", + "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8", + "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a", + "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73", + "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", + "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714", + "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2", + "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc", + "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce", + "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d", + "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e", + "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6", + "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269", + "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96", + "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d", + "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a", + "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4", + "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77", + "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d", + "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0", + "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed", + "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068", + "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac", + "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25", + "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8", + "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab", + "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26", + "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2", + "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db", + "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f", + "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5", + "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99", + "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c", + "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d", + "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811", + "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa", + "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a", + "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03", + "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b", + "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04", + "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c", + "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001", + "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458", + "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389", + "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99", + "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985", + "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537", + "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238", + "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f", + "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d", + "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796", + "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a", + "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143", + "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8", + "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c", + "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5", + "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5", + "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711", + "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4", + "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6", + "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c", + "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7", + "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4", + "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b", + "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae", + "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12", + "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c", + "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae", + "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8", + "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887", + "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b", + "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4", + "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f", + "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5", + "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33", + "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519", + "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561" + ], + "markers": "python_full_version >= '3.7.0'", + "version": "==3.3.2" + }, + "click": { + "hashes": [ + "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", + "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de" + ], + "markers": "python_version >= '3.7'", + "version": "==8.1.7" + }, + "flask": { + "hashes": [ + "sha256:34e815dfaa43340d1d15a5c3a02b8476004037eb4840b34910c6e21679d288f3", + "sha256:ceb27b0af3823ea2737928a4d99d125a06175b8512c445cbd9a9ce200ef76842" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==3.0.3" + }, + "gunicorn": { + "hashes": [ + "sha256:350679f91b24062c86e386e198a15438d53a7a8207235a78ba1b53df4c4378d9", + "sha256:4a0b436239ff76fb33f11c07a16482c521a7e09c1ce3cc293c2330afe01bec63" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==22.0.0" + }, + "idna": { + "hashes": [ + "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc", + "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0" + ], + "markers": "python_version >= '3.5'", + "version": "==3.7" + }, + "itsdangerous": { + "hashes": [ + "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef", + "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173" + ], + "markers": "python_version >= '3.8'", + "version": "==2.2.0" + }, + "jinja2": { + "hashes": [ + "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369", + "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d" + ], + "markers": "python_version >= '3.7'", + "version": "==3.1.4" + }, + "markupsafe": { + "hashes": [ + "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf", + "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff", + "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f", + "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3", + "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532", + "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f", + "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617", + "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df", + "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4", + "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906", + "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f", + "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4", + "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8", + "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371", + "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2", + "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465", + "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52", + "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6", + "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169", + "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad", + "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2", + "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0", + "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029", + "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f", + "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a", + "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced", + "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5", + "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c", + "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf", + "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9", + "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb", + "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad", + "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3", + "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1", + "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46", + "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc", + "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a", + "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee", + "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900", + "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5", + "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea", + "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f", + "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5", + "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e", + "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a", + "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f", + "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50", + "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a", + "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b", + "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4", + "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff", + "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2", + "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46", + "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b", + "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf", + "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5", + "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5", + "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab", + "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd", + "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68" + ], + "markers": "python_version >= '3.7'", + "version": "==2.1.5" + }, + "packaging": { + "hashes": [ + "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002", + "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124" + ], + "markers": "python_version >= '3.8'", + "version": "==24.1" + }, + "python-dotenv": { + "hashes": [ + "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca", + "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==1.0.1" + }, + "requests": { + "hashes": [ + "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", + "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==2.32.3" + }, + "structlog": { + "hashes": [ + "sha256:0e3fe74924a6d8857d3f612739efb94c72a7417d7c7c008d12276bca3b5bf13b", + "sha256:983bd49f70725c5e1e3867096c0c09665918936b3db27341b41d294283d7a48a" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==24.2.0" + }, + "urllib3": { + "hashes": [ + "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472", + "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168" + ], + "markers": "python_version >= '3.8'", + "version": "==2.2.2" + }, + "werkzeug": { + "hashes": [ + "sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18", + "sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8" + ], + "markers": "python_version >= '3.8'", + "version": "==3.0.3" + } + }, + "develop": { + "autopep8": { + "hashes": [ + "sha256:5cfe45eb3bef8662f6a3c7e28b7c0310c7310d340074b7f0f28f9810b44b7ef4", + "sha256:b716efa70cbafbf4a2c9c5ec1cabfa037a68f9e30b04c74ffa5864dd49b8f7d2" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==2.3.0" + }, + "pycodestyle": { + "hashes": [ + "sha256:442f950141b4f43df752dd303511ffded3a04c2b6fb7f65980574f0c31e6e79c", + "sha256:949a39f6b86c3e1515ba1787c2022131d165a8ad271b11370a8819aa070269e4" + ], + "markers": "python_version >= '3.8'", + "version": "==2.12.0" + } + } +} diff --git a/workers/vision-web/Readme.md b/workers/vision-web/Readme.md new file mode 100644 index 0000000..5381af8 --- /dev/null +++ b/workers/vision-web/Readme.md @@ -0,0 +1,55 @@ +# Vision Web Worker + +* [Flask](https://flask.palletsprojects.com/en/2.1.x/), to manage the network connection +* [gunicorn](https://gunicorn.org/), for production deployment +* [requests](https://docs.python-requests.org/en/latest/), to handle contacting the ashirt instance +* [structlog](https://www.structlog.org/en/stable/), for structured logging +* [python-dotenv](https://pypi.org/project/python-dotenv/), for environment loading (this is primarily aimed at development) + +In addition, this service tries to be as type-safe as possible, so extra effort has been provided to ensure that the typing is specified as much as possible. + +To get up and running, open the project root in a terminal, install pipenv, and run `pipenv shell`, then `pipenv install` + +## Deploying to AShirt + +The typical configuration for deploying this worker archetype is going to look roughly like this: + +```json +{ + "type": "web", + "version": 1, + "url": "http://vision-web/process" +} +``` + +Note the url: this is likely what will change for your version. + +## Adding custom logic + +Most programs should be able to largely ignore most of the code, and instead focus on `actions` directory, and specifically the events you want to target. + +## Integrating into AShirt testing environment + +Notably, the dev port exposed is port 8080, so all port mapping has to be done with that in mind. When running locally (not via docker), the exposed port is configurable. + +This configuration should work for your scenario, though the volumes mapped might need to be different. + +```yaml + vision-web: + build: + context: ashirt-workers/workers/vision-web + dockerfile: Dockerfile.dev + ports: + - 3004:8080 + restart: on-failure + volumes: + - ./ashirt-workers/workers/vision-web/:/app/ + environment: + ENABLE_DEV: true + ASHIRT_BACKEND_URL: http://backend:3000 + ASHIRT_ACCESS_KEY: gR6nVtaQmp2SvzIqLUWdedDk + ASHIRT_SECRET_KEY: WvtvxFaJS0mPs82nCzqamI+bOGXpq7EIQhg4UD8nxS5448XG9N0gNAceJGBLPdCA3kAzC4MdUSHnKCJ/lZD++A== +``` + + +Note that the mapped volume overwrites the source files placed in the image. This allows for hot-reloading of the worker when deployed to docker-compose. If you don't want or need hot reloading, then you can simply omit this declaration. diff --git a/workers/vision-web/docker-compose.yml b/workers/vision-web/docker-compose.yml new file mode 100644 index 0000000..7c702f4 --- /dev/null +++ b/workers/vision-web/docker-compose.yml @@ -0,0 +1,12 @@ +version: '3' +services: + app: + build: + dockerfile: Dockerfile + context: . + env_file: + - .env + ports: + - 9000:8080 + volumes: + - ./src:/app/src diff --git a/workers/vision-web/requirements.txt b/workers/vision-web/requirements.txt new file mode 100644 index 0000000..8d5fa32 --- /dev/null +++ b/workers/vision-web/requirements.txt @@ -0,0 +1,17 @@ +blinker==1.8.2 +certifi==2024.6.2 +charset-normalizer==3.3.2 +click==8.1.7 +Flask==3.0.3 +gunicorn==22.0.0 +idna==3.7 +itsdangerous==2.2.0 +Jinja2==3.1.4 +MarkupSafe==2.1.5 +onnxruntime-genai==0.3.0rc2 +packaging==24.1 +python-dotenv==1.0.1 +requests==2.32.3 +structlog==24.2.0 +urllib3==2.2.2 +Werkzeug==3.0.3 diff --git a/workers/vision-web/src/__init__.py b/workers/vision-web/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/workers/vision-web/src/actions/__init__.py b/workers/vision-web/src/actions/__init__.py new file mode 100644 index 0000000..14ed358 --- /dev/null +++ b/workers/vision-web/src/actions/__init__.py @@ -0,0 +1,2 @@ +from .process_handler import * +from .types import * diff --git a/workers/vision-web/src/actions/process_handler.py b/workers/vision-web/src/actions/process_handler.py new file mode 100644 index 0000000..210247a --- /dev/null +++ b/workers/vision-web/src/actions/process_handler.py @@ -0,0 +1,100 @@ +from request_types import EvidenceCreatedBody +from services import AShirtRequestsService +from constants import SupportedContentType +from .types import ProcessResultDTO +import onnxruntime_genai as og +import uuid +import os +import io + +model = og.Model('model/cpu-int4-rtn-block-32-acc-level-4/') +processor = model.create_multimodal_processor() +tokenizer_stream = processor.create_stream() + +def handle_evidence_created(body: EvidenceCreatedBody) -> ProcessResultDTO: + """ + handle_process is called when a web request comess in, is validated, and indicates that work + needs to be done on a piece of evidence + """ + accepted_types = [ + SupportedContentType.IMAGE + ] + + if body.content_type in accepted_types: + ashirt_svc = AShirtRequestsService( + os.environ.get('ASHIRT_BACKEND_URL', ''), + os.environ.get('ASHIRT_ACCESS_KEY', ''), + os.environ.get('ASHIRT_SECRET_KEY', '') + ) + # Gather content + evidence_content = ashirt_svc.get_evidence_content( + body.operation_slug, body.evidence_uuid, 'media' + ) + if type(evidence_content) == str: + print("got a string response") + elif type(evidence_content) != bytes: + return + + temp_image_path = f"/tmp/{uuid.uuid4()}.png" # Generate a unique name for the image file in /tmp directory + with open(temp_image_path, "wb") as f: + f.write(io.BytesIO(evidence_content).getbuffer()) + img = og.Images.open(temp_image_path) + + default_questions = [ + "What times are shown in the image?", + "Which applications are open in the image?", + "Which operating system is being used in the image?", + "What does the image say?" + ] + questions = os.environ.get('VISION_QUESTIONS', ','.join(default_questions)) + questions = questions.split(',') # Convert question(s) to a list + + resp = [] + for q in questions: + resp.append(do_ai(question=q,image=img)) # Run inference for each question + chunks = [f'Q:{x[0]}\nA:{x[1]}\n' for x in zip(questions,resp)] + os.remove(temp_image_path) # Delete the image file after using it + + return { + 'action': 'processed', + 'content': '\n'.join(chunks) + } + else: + return { + 'action': 'rejected' + } + +def do_ai(question, image=None): + generated_text = "" + prompt = "<|user|>\n" + if not image: + print("No image provided") + else: + print("Loading image...") + prompt += "<|image_1|>\n" + + prompt += f"{question}<|end|>\n<|assistant|>\n" + print("Processing image and prompt...") + inputs = processor(prompt, images=image) + + print("Generating response...") + params = og.GeneratorParams(model) + params.set_inputs(inputs) + params.set_search_options(max_length=3072) + + generator = og.Generator(model, params) + + while not generator.is_done(): + generator.compute_logits() + generator.generate_next_token() + + new_token = generator.get_next_tokens()[0] + decoded_text = tokenizer_stream.decode(new_token) + generated_text += decoded_text + print(decoded_text, end='', flush=True) + for _ in range(3): + print() + generated_text = generated_text.replace('','') + # Delete the generator to free the captured graph before creating another one + del generator + return generated_text \ No newline at end of file diff --git a/workers/vision-web/src/actions/types.py b/workers/vision-web/src/actions/types.py new file mode 100644 index 0000000..50402b9 --- /dev/null +++ b/workers/vision-web/src/actions/types.py @@ -0,0 +1,20 @@ +from typing import Literal, Optional, TypedDict, Union + + +class ProcessResultNormal(TypedDict): + action: Literal['rejected', 'error'] + content: Optional[str] + + +class ProcessResultComplete(TypedDict): + action: Literal['processed'] + content: str + + +class ProcessResultDeferred(TypedDict): + action: Literal['deferred'] + + +ProcessResultDTO = Union[ProcessResultNormal, + ProcessResultComplete, + ProcessResultDeferred] diff --git a/workers/vision-web/src/constants/__init__.py b/workers/vision-web/src/constants/__init__.py new file mode 100644 index 0000000..7d8d6f7 --- /dev/null +++ b/workers/vision-web/src/constants/__init__.py @@ -0,0 +1,4 @@ +from .supported_content_type import * + +STATE_NAME = 'state' +APP_LOGGER = 'logger' diff --git a/workers/vision-web/src/constants/supported_content_type.py b/workers/vision-web/src/constants/supported_content_type.py new file mode 100644 index 0000000..9a10951 --- /dev/null +++ b/workers/vision-web/src/constants/supported_content_type.py @@ -0,0 +1,22 @@ +from enum import Enum, auto + + +class SupportedContentType(Enum): + HTTP_REQUEST_CYCLE = auto() + TERMINAL_RECORDING = auto() + CODEBLOCK = auto() + EVENT = auto() + IMAGE = auto() + NONE = auto() + + @staticmethod + def from_str(s: str): + values: dict[str, SupportedContentType] = { + "http-request-cycle": SupportedContentType.HTTP_REQUEST_CYCLE, + "terminal-recording": SupportedContentType.TERMINAL_RECORDING, + "codeblock": SupportedContentType.CODEBLOCK, + "event": SupportedContentType.EVENT, + "image": SupportedContentType.IMAGE, + "none": SupportedContentType.NONE, + } + return values[s] diff --git a/workers/vision-web/src/helpers/__init__.py b/workers/vision-web/src/helpers/__init__.py new file mode 100644 index 0000000..e1c7f01 --- /dev/null +++ b/workers/vision-web/src/helpers/__init__.py @@ -0,0 +1,2 @@ +from .is_literal import * +from .flask_helpers import * diff --git a/workers/vision-web/src/helpers/flask_helpers.py b/workers/vision-web/src/helpers/flask_helpers.py new file mode 100644 index 0000000..31d0165 --- /dev/null +++ b/workers/vision-web/src/helpers/flask_helpers.py @@ -0,0 +1,22 @@ +from flask import ( + current_app, Response, make_response, Flask, +) + + +def jsonify_no_content() -> Response: + """ + jsonify_no_content produces a 204 (no content) response + """ + # from https://www.erol.si/2018/03/flask-return-204-no-content-response/ + response = make_response('', 204) + response.mimetype = current_app.config['JSONIFY_MIMETYPE'] + + return response + + +def remove_flask_logging(app: Flask) -> None: + # See: https://gist.github.com/daryltucker/e40c59a267ea75db12b1 + import logging + app.logger.disabled = True + log = logging.getLogger('werkzeug') + log.disabled = True diff --git a/workers/vision-web/src/helpers/is_literal.py b/workers/vision-web/src/helpers/is_literal.py new file mode 100644 index 0000000..ab1e858 --- /dev/null +++ b/workers/vision-web/src/helpers/is_literal.py @@ -0,0 +1,13 @@ +from typing import Any + + +def is_literal(v: Any, expectedType: type, expectedValue: Any) -> bool: + """ + is_literal is a small helper that verifies that the value passed has the expected type + and the expected value. This is useful to validate literal values provided by an external + service + """ + return ( + type(v) == expectedType + and v == expectedValue + ) diff --git a/workers/vision-web/src/main.py b/workers/vision-web/src/main.py new file mode 100644 index 0000000..22bdba8 --- /dev/null +++ b/workers/vision-web/src/main.py @@ -0,0 +1,45 @@ +import os + +from dotenv import dotenv_values +from flask import Flask +import structlog + +from constants import APP_LOGGER, STATE_NAME +from helpers import remove_flask_logging +from project_config import ProjectConfig +from routes import (ashirt, dev) +from services import AShirtRequestsService +from services import set_service + + +def create_app() -> Flask: + app = Flask(__name__) + + full_env = { + **dotenv_values(".env"), + **os.environ + } + cfg = ProjectConfig.from_dict(full_env) + app.config[STATE_NAME] = cfg + app.config[APP_LOGGER] = structlog.get_logger() + + set_service( + AShirtRequestsService(cfg.backend_url, cfg.access_key, cfg.secret_key_b64) + ) + app.register_blueprint(ashirt.bp) # Add normal routes + if cfg.dev_mode: + app.config[APP_LOGGER].msg("Adding dev routes") + app.register_blueprint(dev.bp) # Add dev routes + + # tweak logging settings + remove_flask_logging(app) + return app + + +if __name__ == "__main__": + app = create_app() + try: + app.config[APP_LOGGER].msg("App Starting") + app.run(host="0.0.0.0", port=app.config[STATE_NAME].port) + finally: + app.config[APP_LOGGER].msg("App Exiting") diff --git a/workers/vision-web/src/project_config.py b/workers/vision-web/src/project_config.py new file mode 100644 index 0000000..7215912 --- /dev/null +++ b/workers/vision-web/src/project_config.py @@ -0,0 +1,34 @@ +from dataclasses import dataclass + + +@dataclass(frozen=True) +class ProjectConfig: + """ + ProjectConfig stores the configuration read from the passed dictionary, if using from_dict + (this is intended to be os.environ). You can then access these values via the fields below. + """ + dev_mode: bool + backend_url: str + access_key: str + secret_key_b64: str + port: str + + @classmethod + def from_dict(cls, data: dict[str, str]): + """ + from_dict attempts to get all of the configuration needs from the provided dictionary. + If a field is not in the dictionary, then the default value is used instead. + """ + dev_mode = data.get('ENABLE_DEV', 'false').lower() == 'true' + backend_url = data.get('ASHIRT_BACKEND_URL', '') + access_key = data.get('ASHIRT_ACCESS_KEY', '') + secret_key_b64 = data.get('ASHIRT_SECRET_KEY', '') + port = data.get('PORT', '5000') + + return cls( + dev_mode=dev_mode, + backend_url=backend_url, + access_key=access_key, + secret_key_b64=secret_key_b64, + port=port, + ) diff --git a/workers/vision-web/src/request_types/__init__.py b/workers/vision-web/src/request_types/__init__.py new file mode 100644 index 0000000..0b58862 --- /dev/null +++ b/workers/vision-web/src/request_types/__init__.py @@ -0,0 +1,2 @@ +from .evidence_created_body import * +from .test_body import * diff --git a/workers/vision-web/src/request_types/evidence_created_body.py b/workers/vision-web/src/request_types/evidence_created_body.py new file mode 100644 index 0000000..9cd159e --- /dev/null +++ b/workers/vision-web/src/request_types/evidence_created_body.py @@ -0,0 +1,34 @@ +from dataclasses import dataclass +from typing import Any, Literal + +from constants import SupportedContentType +from helpers import is_literal +from .generic_request_body import GenericRequestBody + + +@dataclass(repr=False, frozen=True) +class EvidenceCreatedBody(GenericRequestBody): + """ + EvidenceCreatedBody reflects the message received from AShirt when AShirt requests metadata processing + """ + type: Literal['evidence_created'] + evidence_uuid: str + operation_slug: str + content_type: SupportedContentType + + def is_valid_instance(self) -> bool: + return all([ + is_literal(self.type, str, 'evidence_created'), + type(self.evidence_uuid) is str, + type(self.operation_slug) is str, + type(self.content_type) is SupportedContentType, + ]) + + @classmethod + def from_json(cls, data: dict[str, Any]): + cls.type = data['type'] + cls.evidence_uuid = data['evidenceUuid'] + cls.operation_slug = data['operationSlug'] + cls.content_type = SupportedContentType.from_str(data['contentType']) + + return cls diff --git a/workers/vision-web/src/request_types/generic_request_body.py b/workers/vision-web/src/request_types/generic_request_body.py new file mode 100644 index 0000000..e5c48f2 --- /dev/null +++ b/workers/vision-web/src/request_types/generic_request_body.py @@ -0,0 +1,25 @@ +from abc import ABC, abstractmethod +from typing import Any + + +class GenericRequestBody(ABC): + + @abstractmethod + def from_json(cls, data: dict[str, Any]): + pass + + @abstractmethod + def is_valid_instance(self, data: dict[str, Any]) -> bool: + pass + + @classmethod + def parse_if_valid(cls, data: dict[str, Any]): + """ + parse_if_valid checks that the given data is valid, then parses it. + if is not valid, or if an error occurs when parsing, then None is returned + """ + try: + inst = cls.from_json(data) + return cls if cls.is_valid_instance(inst) else None + except (KeyError): + return None diff --git a/workers/vision-web/src/request_types/test_body.py b/workers/vision-web/src/request_types/test_body.py new file mode 100644 index 0000000..d0524fb --- /dev/null +++ b/workers/vision-web/src/request_types/test_body.py @@ -0,0 +1,23 @@ +from dataclasses import dataclass +from typing import Any, Literal + +from helpers import is_literal +from .generic_request_body import GenericRequestBody + + +@dataclass(repr=False, frozen=True) +class TestBody(GenericRequestBody): + """ + TestBody reflects the message received from AShirt when AShirt requests testing + """ + + type: Literal['test'] + + def is_valid_instance(self) -> bool: + return is_literal(self.type, str, 'test') + + @classmethod + def from_json(cls, data: dict[str, Any]): + cls.type = data['type'] + + return cls diff --git a/workers/vision-web/src/routes/__init__.py b/workers/vision-web/src/routes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/workers/vision-web/src/routes/ashirt.py b/workers/vision-web/src/routes/ashirt.py new file mode 100644 index 0000000..0db2509 --- /dev/null +++ b/workers/vision-web/src/routes/ashirt.py @@ -0,0 +1,76 @@ +from flask import ( + Blueprint, request, current_app, jsonify, Response, g +) +import json +from uuid import uuid4 + +from constants import APP_LOGGER +from request_types import (EvidenceCreatedBody, TestBody) +from state import RequestState +import actions + +from .types import StatusCode + + +bp = Blueprint('ashirt', __name__, url_prefix='/') + + +@bp.route("/process", methods=['POST']) +def process_request() -> Response: + """ + process_request handles requests received from AShirt + """ + data = request.json + if TestBody.parse_if_valid(data) is not None: + return jsonify({"status": "ok"}) + + if (body := EvidenceCreatedBody.parse_if_valid(data)) is not None: + action_result = actions.handle_evidence_created(body) + # Construct a response that provides a body when a body is meaningful + rtn = ( + Response() + if action_result.get('content') is None + else Response(json.dumps(action_result)) + ) + + rtn.status_code = { + 'processed': StatusCode.OK.value, + 'deferred': StatusCode.ACCEPTED.value, + 'error': StatusCode.INTERNAL_SERVICE_ERROR.value, + 'rejected': StatusCode.NOT_ACCEPTABLE.value, + }[action_result['action']] + return rtn + + return Response('Unsupported Body Type', status=501) + + +########## Blueprint Stuff ############ + +@bp.before_request +def on_request_received(): + """ + on_request_received established a state for the request, complete with a logger. Also logs the + start of the request for anaylitics purposes + """ + ctx = str(uuid4()) + app_logger = current_app.config[APP_LOGGER] + req_log = app_logger.bind(context=ctx) + + req_log.msg("Received Request", + method=request.method, + endpoint=request.full_path, + query=request.query_string) + g._request_state = RequestState(req_log) + + +@bp.after_request +def on_request_complete(resp): + """ + on_request_complete logs when the request has been completed + """ + req_state = g._request_state + if type(req_state) == RequestState: + g._request_state.req_log.msg( + "Request Complete", response_code=resp.status_code, body=resp.data) + + return resp diff --git a/workers/vision-web/src/routes/dev.py b/workers/vision-web/src/routes/dev.py new file mode 100644 index 0000000..9b7d534 --- /dev/null +++ b/workers/vision-web/src/routes/dev.py @@ -0,0 +1,22 @@ +from flask import ( + Blueprint, jsonify, Response +) + +# from services import svc + +# Apply the user's suggested edit +bp = Blueprint('dev', __name__, url_prefix='/') + + +@bp.route("/") +def index() -> Response: + """index provides a method to verify that the service is live""" + return jsonify({ + "msg": "GET /" + }) + +@bp.route("/test", methods=['POST']) +def test() -> Response: + """test provides a place to verify that individual steps work as expected""" + + return jsonify({"Done": "you bet!"}) diff --git a/workers/vision-web/src/routes/types.py b/workers/vision-web/src/routes/types.py new file mode 100644 index 0000000..392d2d2 --- /dev/null +++ b/workers/vision-web/src/routes/types.py @@ -0,0 +1,12 @@ +from enum import Enum + + +class StatusCode(Enum): + """StatusCode is a set of status codes supported by AShirt.""" + OK = 200 + ACCEPTED = 202 + NO_CONTENT = 204 + BAD_REQUEST = 400 + NOT_ACCEPTABLE = 406 + INTERNAL_SERVICE_ERROR = 500 + NOT_IMPLEMENTED = 501 diff --git a/workers/vision-web/src/services/__init__.py b/workers/vision-web/src/services/__init__.py new file mode 100644 index 0000000..070244e --- /dev/null +++ b/workers/vision-web/src/services/__init__.py @@ -0,0 +1,23 @@ +from .helpers import * +from .types import * +from .ashirt_base_class import * +from .ashirt_sync import * + + +_ashirt_service: AShirtService + + +def set_service(svc: AShirtService): + """ + set_service stores an instance of a concrete AShirtService class. This is paired with svc to + allow making requests anywhere in the appliction. + """ + global _ashirt_service + _ashirt_service = svc + + +def svc() -> AShirtService: + """ + svc provides an established, concrete AShirtService class that can make requests to AShirt. + """ + return _ashirt_service diff --git a/workers/vision-web/src/services/ashirt_base_class.py b/workers/vision-web/src/services/ashirt_base_class.py new file mode 100644 index 0000000..a2526d0 --- /dev/null +++ b/workers/vision-web/src/services/ashirt_base_class.py @@ -0,0 +1,134 @@ +from abc import ABC, abstractmethod +from base64 import b64decode +import json +from typing import Any, Callable, Literal, Optional + +from . import ( + encode_form, + make_hmac, + now_in_rfc1123, + RequestConfig as RC, + CreateOperationInput, + CreateEvidenceInput, + CreateTagInput, + UpdateEvidenceInput, + UpsertEvidenceMetadata +) + + +class AShirtService(ABC): + """ + AShirtService is an abstract class that holds the necessary details to construct a request with + the proper headers to contact the AShirt backend. Note that this goes up to modeling the request. + The actual sending of the request is left to the subclasses. + """ + + def __init__(self, api_url: str, access_key: str, secret_key_b64: str): + self.api_url = api_url + self.access_key = access_key + self.secret_key = b64decode(secret_key_b64) + + @abstractmethod + def _make_request(cls, cfg: RC, headers: dict[str, str], body: Optional[bytes])->bytes|str|int: + """ + _make_request is an abstract method designed to actually make the request. Subclasses will + need to implement this with the boilerplate code that actually does the request. + """ + pass + + def get_operations(self): + return self.build_request(RC('GET', '/api/operations')) + + def create_operation(self, i: CreateOperationInput): + return self.build_request(RC('POST', '/api/operations', json.dumps(i))) + + def check_connection(self): + return self.build_request(RC('GET', '/api/checkconnection')) + + def get_evidence(self, operation_slug: str, evidence_uuid: str): + return self.build_request(RC('GET', f'/api/operations/{operation_slug}/evidence/{evidence_uuid}')) + + def get_evidence_content(self, operation_slug: str, evidence_uuid: str, content_type: Literal['media', 'preview']='media'): + return self.build_request(RC( + 'GET', + f'/api/operations/{operation_slug}/evidence/{evidence_uuid}/{content_type}', + None, + 'raw' + )) + + def create_evidence(self, operation_slug: str, i: CreateEvidenceInput): + body = { + 'notes': i['notes'], + } + add_if_not_none(body, 'contentType', i.get('content_type')) + add_if_not_none(body, 'tagIds', i.get('tag_ids'), json.dumps) + + data = encode_form(body, {"file": i.get('file')}) + + return self.build_request(RC('POST', + f'/api/operations/{operation_slug}/evidence', + body=data['data'], + multipart_boundary=data['boundary']) + ) + + def update_evidence(self, operation_slug: str, evidence_uuid: str, i: UpdateEvidenceInput): + body = {} + + add_if_not_none(body, 'notes', i.get('notes')) + add_if_not_none(body, 'contentType', i.get('content_type')) + add_if_not_none(body, 'tagsToAdd', i.get('add_tag_ids'), json.dumps) + add_if_not_none(body, 'tagsToRemove', i.get('remove_tag_ids'), json.dumps) + + data = encode_form(body, {"file": i.get('file')}) + + return self.build_request(RC('PUT', + f'/api/operations/{operation_slug}/evidence/{evidence_uuid}', + body=data['data'], + multipart_boundary=data['boundary'], + return_type='status' + )) + + def upsert_evidence_metadata(self, operation_slug: str, evidence_uuid: str, i: UpsertEvidenceMetadata): + return self.build_request(RC( + 'PUT', + f'/api/operations/{operation_slug}/evidence/{evidence_uuid}/metadata', + body=json.dumps(i), + return_type='status' + )) + + def get_operation_tags(self, operation_slug: str): + return self.build_request(RC('GET', f'/api/operations/{operation_slug}/tags')) + + def create_operation_tag(self, operation_slug: str, i: CreateTagInput): + return self.build_request(RC('POST', f'/api/operations/{operation_slug}/tags', json.dumps(i))) + + def build_request(self, cfg: RC): + """ + build_request models a request, and the passes the request to the actual executor methods + (_make_request) + """ + now = now_in_rfc1123() + + # with_body should now be either bytes or None + with_body = cfg.body.encode() if type(cfg.body) is str else cfg.body + + auth = make_hmac(cfg.method, cfg.path, now, with_body, + self.access_key, self.secret_key) + + if cfg.multipart_boundary is None: + content_type = "application/json" + else: + content_type = f'multipart/form-data; boundary={cfg.multipart_boundary}' + + headers = { + "Content-Type": content_type, + "Date": now, + "Authorization": auth, + } + + return self._make_request(cfg, headers, with_body) + + +def add_if_not_none(body: dict[str, Any], key: str, value: Any, tf: Callable[[Any], Any]=None): + if value is not None: + body.update({key: value if tf is None else tf(value)}) diff --git a/workers/vision-web/src/services/ashirt_sync.py b/workers/vision-web/src/services/ashirt_sync.py new file mode 100644 index 0000000..f788327 --- /dev/null +++ b/workers/vision-web/src/services/ashirt_sync.py @@ -0,0 +1,32 @@ +from typing import Optional +import requests + +from .ashirt_base_class import AShirtService +from . import ( + RequestConfig as RC, +) + + +class AShirtRequestsService(AShirtService): + """ + AShirtRequestsService is a subclass of AShirtService that makes requests using the Requests + library. This is a sychronous library, and so care needs to be taken when using this service. + """ + def __init__(self, api_url: str, access_key: str, secret_key_b64: str): + super().__init__(api_url, access_key, secret_key_b64) + + def _make_request(self, cfg: RC, headers: dict[str, str], body: Optional[bytes])->bytes: + resp = requests.request( + cfg.method, self._route_to(cfg.path), headers=headers, data=body, stream=True) + + if cfg.return_type == 'json': + return resp.json() + elif cfg.return_type == 'status': + return resp.status_code + elif cfg.return_type == 'text': + return resp.text + + return resp.content + + def _route_to(self, path: str): + return f'{self.api_url}{path}' diff --git a/workers/vision-web/src/services/helpers.py b/workers/vision-web/src/services/helpers.py new file mode 100644 index 0000000..bf66260 --- /dev/null +++ b/workers/vision-web/src/services/helpers.py @@ -0,0 +1,82 @@ +from base64 import b64encode, urlsafe_b64encode +from datetime import datetime +import hashlib +import hmac +import os +from typing import Optional +from wsgiref.handlers import format_date_time + +from .types import HTTP_METHOD, FileData, MultipartData + + +def make_hmac( + method: HTTP_METHOD, + path: str, + date: str, + body: Optional[bytes], + access_key: str, + secret_key: bytes +): + """ + make_hamc builds the authentication string needed to contact ashirt. + """ + body_digest_method = hashlib.sha256() + if body is not None: + body_digest_method.update(body) + body_digest = body_digest_method.digest() + + to_be_hashed = f'{method}\n{path}\n{date}\n' + full_message = to_be_hashed.encode() + body_digest + + hmacMessage = b64encode( + hmac.new(secret_key, full_message, hashlib.sha256).digest()) + + return f'{access_key}:{hmacMessage.decode("ascii")}' + + +def now_in_rfc1123(): + """now_in_rfc1123 constructs a date like: Wed, May 11 2022 09:29:02 GMT""" + return format_date_time(datetime.now().timestamp()) + + +def _random_char(length: int): + return urlsafe_b64encode(os.urandom(length)) + + +def encode_form(fields: dict[str, str], files: dict[str, FileData]) -> MultipartData: + boundary = "----AShirtFormData-".encode() + _random_char(30) + newline = "\r\n".encode() + part = "--".encode() + boundary_start = part + boundary + newline + last_boundary = part + boundary + part + newline + content_dispo = "Content-Disposition: form-data".encode() + + field_buff = bytes() + for key, value in fields.items(): + entry = ( + boundary_start + + content_dispo + f'; name="{key}"'.encode() + + newline + newline + + value.encode() + + newline + ) + field_buff += entry + + file_buff = bytes() + for key, value in files.items(): + if value is None: + continue + entry = ( + boundary_start + + content_dispo + f'; name="{key}"; filename="{value["filename"]}"'.encode() + + newline + f'Content-Type: {value["mimetype"]}'.encode() + + newline + newline + + value['content'] + + newline + ) + file_buff += entry + + return { + "boundary": boundary.decode(), + "data": field_buff + file_buff + last_boundary + } diff --git a/workers/vision-web/src/services/types.py b/workers/vision-web/src/services/types.py new file mode 100644 index 0000000..cb1890c --- /dev/null +++ b/workers/vision-web/src/services/types.py @@ -0,0 +1,80 @@ +from dataclasses import dataclass +import mimetypes +from typing import Literal, Optional, TypedDict + +from constants.supported_content_type import SupportedContentType + +HTTP_METHOD = Literal['GET', 'POST', 'PUT', 'DELETE'] + + +class FileData(TypedDict): + filename: str + mimetype: str + content: bytes + + +@dataclass(frozen=True) +class RequestConfig: + """ + RequestConfig abstracts a request so that it can be sent via different libraries, + in case you don't like requests + """ + method: HTTP_METHOD + path: str + body: Optional[bytes | str] = None + return_type: Literal["json", "raw", "status", "text"] = "json" + multipart_boundary: Optional[str] = None + + +# The below are all inputs for various API calls + +class CreateOperationInput(TypedDict): + slug: str + name: str + + +class CreateEvidenceInput(TypedDict): + notes: str + content_type: Optional[SupportedContentType] + tag_ids: Optional[list[int]] + file: Optional[FileData] + + +class UpdateEvidenceInput(TypedDict): + notes: Optional[str] + content_type: Optional[SupportedContentType] + add_tag_ids: Optional[list[int]] + remove_tag_ids: Optional[list[int]] + file: Optional[FileData] + + +class UpsertEvidenceMetadata(TypedDict): + source: str + body: str + status: str + message: Optional[str] + canProcess: Optional[bool] + + +class CreateTagInput(TypedDict): + name: str + colorName: Optional[str] + + +class MultipartData(TypedDict): + boundary: str + data: bytes + + +def parse_file(filename: str, binary=True): + method = 'rb' if binary else 'r' + with open(filename, method) as fh: + data = fh.read(-1) + + mimetypes.guess_type(filename) + + return FileData( + filename=filename, + content=data, + mimetype="application/octet-stream" + ) diff --git a/workers/vision-web/src/state.py b/workers/vision-web/src/state.py new file mode 100644 index 0000000..b56fdd0 --- /dev/null +++ b/workers/vision-web/src/state.py @@ -0,0 +1,7 @@ +class RequestState(object): + """ + RequestState captures the memory needs of an in-flight request. If you need to store data + temporarily (for the lifetime of a request), you can stick it here + """ + def __init__(self, request_logger): + self.req_log = request_logger diff --git a/workers/vision-web/src/wsgi.py b/workers/vision-web/src/wsgi.py new file mode 100644 index 0000000..de92b92 --- /dev/null +++ b/workers/vision-web/src/wsgi.py @@ -0,0 +1,10 @@ +# Setting the import path so that flask development properly runs the code +# (Add "." to the import path searching) +import sys +from os.path import abspath, dirname + +sys.path.insert(0, dirname(abspath(__file__))) + +from main import create_app + +app = create_app()