From eca08c4e399392d5e56738ca034f3878439bb13b Mon Sep 17 00:00:00 2001 From: pookie Date: Wed, 19 Jun 2024 15:22:03 -0700 Subject: [PATCH 01/16] initial vision-web worker --- workers/vision-web/.gitignore | 25 ++ workers/vision-web/Dockerfile.dev | 30 ++ workers/vision-web/Makefile | 29 ++ workers/vision-web/Pipfile | 17 + workers/vision-web/Pipfile.lock | 318 ++++++++++++++++++ workers/vision-web/Readme.md | 80 +++++ workers/vision-web/docker-compose.yml | 12 + workers/vision-web/requirements.txt | 17 + workers/vision-web/src/__init__.py | 0 workers/vision-web/src/actions/__init__.py | 2 + .../vision-web/src/actions/process_handler.py | 100 ++++++ workers/vision-web/src/actions/types.py | 20 ++ workers/vision-web/src/constants/__init__.py | 4 + .../src/constants/supported_content_type.py | 22 ++ workers/vision-web/src/helpers/__init__.py | 2 + .../vision-web/src/helpers/flask_helpers.py | 22 ++ workers/vision-web/src/helpers/is_literal.py | 13 + workers/vision-web/src/main.py | 45 +++ workers/vision-web/src/project_config.py | 34 ++ .../vision-web/src/request_types/__init__.py | 2 + .../request_types/evidence_created_body.py | 34 ++ .../src/request_types/generic_request_body.py | 25 ++ .../vision-web/src/request_types/test_body.py | 23 ++ workers/vision-web/src/routes/__init__.py | 0 workers/vision-web/src/routes/ashirt.py | 76 +++++ workers/vision-web/src/routes/dev.py | 22 ++ workers/vision-web/src/routes/types.py | 12 + workers/vision-web/src/services/__init__.py | 23 ++ .../src/services/ashirt_base_class.py | 134 ++++++++ .../vision-web/src/services/ashirt_sync.py | 32 ++ workers/vision-web/src/services/helpers.py | 82 +++++ workers/vision-web/src/services/types.py | 80 +++++ workers/vision-web/src/state.py | 7 + workers/vision-web/src/wsgi.py | 10 + 34 files changed, 1354 insertions(+) create mode 100644 workers/vision-web/.gitignore create mode 100644 workers/vision-web/Dockerfile.dev create mode 100644 workers/vision-web/Makefile create mode 100644 workers/vision-web/Pipfile create mode 100644 workers/vision-web/Pipfile.lock create mode 100644 workers/vision-web/Readme.md create mode 100644 workers/vision-web/docker-compose.yml create mode 100644 workers/vision-web/requirements.txt create mode 100644 workers/vision-web/src/__init__.py create mode 100644 workers/vision-web/src/actions/__init__.py create mode 100644 workers/vision-web/src/actions/process_handler.py create mode 100644 workers/vision-web/src/actions/types.py create mode 100644 workers/vision-web/src/constants/__init__.py create mode 100644 workers/vision-web/src/constants/supported_content_type.py create mode 100644 workers/vision-web/src/helpers/__init__.py create mode 100644 workers/vision-web/src/helpers/flask_helpers.py create mode 100644 workers/vision-web/src/helpers/is_literal.py create mode 100644 workers/vision-web/src/main.py create mode 100644 workers/vision-web/src/project_config.py create mode 100644 workers/vision-web/src/request_types/__init__.py create mode 100644 workers/vision-web/src/request_types/evidence_created_body.py create mode 100644 workers/vision-web/src/request_types/generic_request_body.py create mode 100644 workers/vision-web/src/request_types/test_body.py create mode 100644 workers/vision-web/src/routes/__init__.py create mode 100644 workers/vision-web/src/routes/ashirt.py create mode 100644 workers/vision-web/src/routes/dev.py create mode 100644 workers/vision-web/src/routes/types.py create mode 100644 workers/vision-web/src/services/__init__.py create mode 100644 workers/vision-web/src/services/ashirt_base_class.py create mode 100644 workers/vision-web/src/services/ashirt_sync.py create mode 100644 workers/vision-web/src/services/helpers.py create mode 100644 workers/vision-web/src/services/types.py create mode 100644 workers/vision-web/src/state.py create mode 100644 workers/vision-web/src/wsgi.py diff --git a/workers/vision-web/.gitignore b/workers/vision-web/.gitignore new file mode 100644 index 0000000..ab4fdde --- /dev/null +++ b/workers/vision-web/.gitignore @@ -0,0 +1,25 @@ +# compiled output +__pycache__ + +# OS +.DS_Store + +# IDEs and editors +/.idea +.project +.classpath +.c9/ +*.launch +.settings/ +*.sublime-workspace + +# IDE - VSCode +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json + +# local env files +.env* +!.env.example diff --git a/workers/vision-web/Dockerfile.dev b/workers/vision-web/Dockerfile.dev new file mode 100644 index 0000000..b9f35b8 --- /dev/null +++ b/workers/vision-web/Dockerfile.dev @@ -0,0 +1,30 @@ +FROM --platform=linux/amd64 python:3.12-slim + +# With help from https://pipenv.pypa.io/en/latest/basics/#pipenv-and-docker-containers + +WORKDIR /app +ENV PIPENV_VENV_IN_PROJECT=1 + +RUN pip install --user pipenv huggingface_hub +# Download the required model components +RUN /root/.local/bin/huggingface-cli download microsoft/Phi-3-vision-128k-instruct-onnx-cpu --include cpu-int4-rtn-block-32-acc-level-4/* --local-dir model/ + +################################### +# Install other dependencies here # +################################### + +COPY Pipfile.lock Pipfile ./ +RUN /root/.local/bin/pipenv sync +RUN /root/.local/bin/pipenv run pip install --pre onnxruntime-genai numpy + +ENV FLASK_APP src/wsgi.py +ENV FLASK_DEBUG 1 +ENV FLAKS_ENV=developement + +EXPOSE 80 + +COPY src ./src/ + +CMD ["/root/.local/bin/pipenv", "run", \ + "flask", "run", \ + "--host=0.0.0.0", "--port=80"] diff --git a/workers/vision-web/Makefile b/workers/vision-web/Makefile new file mode 100644 index 0000000..e7f3dff --- /dev/null +++ b/workers/vision-web/Makefile @@ -0,0 +1,29 @@ +imageNameDev = vision-web-dev +imageName = vision-web + +.PHONY: build_dev +build_dev: + podman build -t $(imageNameDev) -f Dockerfile.dev . + +.PHONY: build_release +build_release: + podman build -t $(imageName) -f Dockerfile.prod . + +run_dev: + podman compose up --build + +.PHONY: test-test +test-test: + curl -XPOST "http://localhost:9000/test" -d '{"type": "test"}' + +.PHONY: test-process-img +test-process-img: + curl -XPOST "http://localhost:9000/process" \ + -H "Content-Type: application/json" \ + -d '{"type": "evidence_created", "evidenceUuid": "seed_dursleys", "operationSlug": "HPSS", "contentType": "image"}' + +.PHONY: test-unsupported +test-unsupported: + curl -XPOST "http://localhost:9000/process" \ + -H "Content-Type: application/json" \ + -d '{"type": "unsupported"}' diff --git a/workers/vision-web/Pipfile b/workers/vision-web/Pipfile new file mode 100644 index 0000000..4753159 --- /dev/null +++ b/workers/vision-web/Pipfile @@ -0,0 +1,17 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +flask = "*" +gunicorn = "*" +requests = "*" +structlog = "*" +python-dotenv = "*" + +[dev-packages] +autopep8 = "*" + +[requires] +python_version = "3.12" diff --git a/workers/vision-web/Pipfile.lock b/workers/vision-web/Pipfile.lock new file mode 100644 index 0000000..f14c522 --- /dev/null +++ b/workers/vision-web/Pipfile.lock @@ -0,0 +1,318 @@ +{ + "_meta": { + "hash": { + "sha256": "bca5f134f8fc8994b66fee4604e9c92d368d151e6e8e146b80f94aecd44e3200" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.12" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "blinker": { + "hashes": [ + "sha256:1779309f71bf239144b9399d06ae925637cf6634cf6bd131104184531bf67c01", + "sha256:8f77b09d3bf7c795e969e9486f39c2c5e9c39d4ee07424be2bc594ece9642d83" + ], + "markers": "python_version >= '3.8'", + "version": "==1.8.2" + }, + "certifi": { + "hashes": [ + "sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516", + "sha256:ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56" + ], + "markers": "python_version >= '3.6'", + "version": "==2024.6.2" + }, + "charset-normalizer": { + "hashes": [ + "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027", + "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087", + "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786", + "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8", + "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09", + "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185", + "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574", + "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e", + "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519", + "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898", + "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269", + "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3", + "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f", + "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6", + "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8", + "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a", + "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73", + "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", + "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714", + "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2", + "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc", + "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce", + "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d", + "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e", + "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6", + "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269", + "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96", + "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d", + "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a", + "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4", + "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77", + "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d", + "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0", + "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed", + "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068", + "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac", + "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25", + "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8", + "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab", + "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26", + "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2", + "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db", + "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f", + "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5", + "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99", + "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c", + "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d", + "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811", + "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa", + "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a", + "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03", + "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b", + "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04", + "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c", + "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001", + "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458", + "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389", + "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99", + "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985", + "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537", + "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238", + "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f", + "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d", + "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796", + "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a", + "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143", + "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8", + "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c", + "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5", + "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5", + "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711", + "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4", + "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6", + "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c", + "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7", + "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4", + "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b", + "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae", + "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12", + "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c", + "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae", + "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8", + "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887", + "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b", + "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4", + "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f", + "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5", + "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33", + "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519", + "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561" + ], + "markers": "python_full_version >= '3.7.0'", + "version": "==3.3.2" + }, + "click": { + "hashes": [ + "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", + "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de" + ], + "markers": "python_version >= '3.7'", + "version": "==8.1.7" + }, + "flask": { + "hashes": [ + "sha256:34e815dfaa43340d1d15a5c3a02b8476004037eb4840b34910c6e21679d288f3", + "sha256:ceb27b0af3823ea2737928a4d99d125a06175b8512c445cbd9a9ce200ef76842" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==3.0.3" + }, + "gunicorn": { + "hashes": [ + "sha256:350679f91b24062c86e386e198a15438d53a7a8207235a78ba1b53df4c4378d9", + "sha256:4a0b436239ff76fb33f11c07a16482c521a7e09c1ce3cc293c2330afe01bec63" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==22.0.0" + }, + "idna": { + "hashes": [ + "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc", + "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0" + ], + "markers": "python_version >= '3.5'", + "version": "==3.7" + }, + "itsdangerous": { + "hashes": [ + "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef", + "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173" + ], + "markers": "python_version >= '3.8'", + "version": "==2.2.0" + }, + "jinja2": { + "hashes": [ + "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369", + "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d" + ], + "markers": "python_version >= '3.7'", + "version": "==3.1.4" + }, + "markupsafe": { + "hashes": [ + "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf", + "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff", + "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f", + "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3", + "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532", + "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f", + "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617", + "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df", + "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4", + "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906", + "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f", + "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4", + "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8", + "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371", + "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2", + "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465", + "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52", + "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6", + "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169", + "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad", + "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2", + "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0", + "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029", + "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f", + "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a", + "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced", + "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5", + "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c", + "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf", + "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9", + "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb", + "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad", + "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3", + "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1", + "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46", + "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc", + "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a", + "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee", + "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900", + "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5", + "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea", + "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f", + "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5", + "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e", + "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a", + "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f", + "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50", + "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a", + "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b", + "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4", + "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff", + "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2", + "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46", + "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b", + "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf", + "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5", + "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5", + "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab", + "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd", + "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68" + ], + "markers": "python_version >= '3.7'", + "version": "==2.1.5" + }, + "packaging": { + "hashes": [ + "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002", + "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124" + ], + "markers": "python_version >= '3.8'", + "version": "==24.1" + }, + "python-dotenv": { + "hashes": [ + "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca", + "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==1.0.1" + }, + "requests": { + "hashes": [ + "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", + "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==2.32.3" + }, + "structlog": { + "hashes": [ + "sha256:0e3fe74924a6d8857d3f612739efb94c72a7417d7c7c008d12276bca3b5bf13b", + "sha256:983bd49f70725c5e1e3867096c0c09665918936b3db27341b41d294283d7a48a" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==24.2.0" + }, + "urllib3": { + "hashes": [ + "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472", + "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168" + ], + "markers": "python_version >= '3.8'", + "version": "==2.2.2" + }, + "werkzeug": { + "hashes": [ + "sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18", + "sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8" + ], + "markers": "python_version >= '3.8'", + "version": "==3.0.3" + } + }, + "develop": { + "autopep8": { + "hashes": [ + "sha256:5cfe45eb3bef8662f6a3c7e28b7c0310c7310d340074b7f0f28f9810b44b7ef4", + "sha256:b716efa70cbafbf4a2c9c5ec1cabfa037a68f9e30b04c74ffa5864dd49b8f7d2" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==2.3.0" + }, + "pycodestyle": { + "hashes": [ + "sha256:442f950141b4f43df752dd303511ffded3a04c2b6fb7f65980574f0c31e6e79c", + "sha256:949a39f6b86c3e1515ba1787c2022131d165a8ad271b11370a8819aa070269e4" + ], + "markers": "python_version >= '3.8'", + "version": "==2.12.0" + } + } +} diff --git a/workers/vision-web/Readme.md b/workers/vision-web/Readme.md new file mode 100644 index 0000000..690e28d --- /dev/null +++ b/workers/vision-web/Readme.md @@ -0,0 +1,80 @@ +# Python/Flask Enhancement Worker + +This worker is a microservice-type worker built on Python/Flask. +It uses [pipenv](https://pipenv.pypa.io/en/latest/) to manage dependencies. +This project strives to be as minimalistic as possible, but does include some helpful libraries. This +includes: + +* [Flask](https://flask.palletsprojects.com/en/2.1.x/), to manage the network connection +* [gunicorn](https://gunicorn.org/), for production deployment +* [requests](https://docs.python-requests.org/en/latest/), to handle contacting the ashirt instance +* [structlog](https://www.structlog.org/en/stable/), for structured logging +* [python-dotenv](https://pypi.org/project/python-dotenv/), for environment loading (this is primarily aimed at development) + +In addition, this service tries to be as type-safe as possible, so extra effort has been provided to ensure that the typing is specified as much as possible. + +To get up and running, open the project root in a terminal, install pipenv, and run `pipenv shell`, then `pipenv install` + +## Deploying to AShirt + +The typical configuration for deploying this worker archetype is going to look roughly like this: + +```json +{ + "type": "web", + "version": 1, + "url": "http://testapp/ashirt/process" +} +``` + +Note the url: this is likely what will change for your version. + +## Adding custom logic + +Most programs should be able to largely ignore most of the code, and instead focus on `actions` directory, and specifically the events you want to target. + +## How it works + +This section is mostly for those that need to do more than implement the core functionality. This application, like many other webservices, can be divided up into two phases: the startup phase, and the serving phase. + +### Startup Phase + +The Startup Phase is as you might expect: this state is entered once the application starts, and it is responsible for configuring the application for long-term running. The most important bit here is likely the configuration and route management. `create_app` within `main.py` will load configuration details from the environment (locally: `.env` file), create a class for handling requests to an AShirt backend, and register standard routes. Then, either the main line in `main.py` or `wsgi.py` will start the server. This phase ends once the server starts servicing requests, and the application then enters the serving phase. + +### Serving Phase + +The serving phase is largely controlled by what particular route is entered when a user contacts the server. The `routes` directory provides two set of routes: the `ashirt` routes, which are the routes that the AShirt backend will call, and the `dev` routes, which are designed to be only created in a development environment. These serve as helpers and sanity checks. + +When a route is reached -- in particular, when the process route is reached (see: `process_request`), then the service will kick off processing of that data. Some initial boilerplate style code manages the request, and directs all of the actual work to some function in the `actions` directory. These functions will return one of a handful of responses, which will be used to generate the true response to the AShirt backend. + +Once the request is complete, the application waits for another request. + +### Contacting AShirt + +The `services` folder contains a class that is used to contact ashirt. This is the typical way of getting the actual content / interacting with ashirt. This is treated mostly as a singleton by preparing the service in `main.py`, and recording an instance in `services/__init__.py`. Any other module can then use the `svc` function to get the loaded instance. + +## Integrating into AShirt testing environment + +Notably, the dev port exposed is port 8080, so all port mapping has to be done with that in mind. When running locally (not via docker), the exposed port is configurable. + +This configuration should work for your scenario, though the volumes mapped might need to be different. + +```yaml + py_flask: + build: + context: enhancement_worker_templates/web/python_flask + dockerfile: Dockerfile.dev + ports: + - 3004:8080 + restart: on-failure + volumes: + - ./enhancement_worker_templates/web/python_flask/src:/app/src + environment: + ENABLE_DEV: true + ASHIRT_BACKEND_URL: http://backend:3000 + ASHIRT_ACCESS_KEY: gR6nVtaQmp2SvzIqLUWdedDk + ASHIRT_SECRET_KEY: WvtvxFaJS0mPs82nCzqamI+bOGXpq7EIQhg4UD8nxS5448XG9N0gNAceJGBLPdCA3kAzC4MdUSHnKCJ/lZD++A== +``` + + +Note that the mapped volume overwrites the source files placed in the image. This allows for hot-reloading of the worker when deployed to docker-compose. If you don't want or need hot reloading, then you can simply omit this declaration. diff --git a/workers/vision-web/docker-compose.yml b/workers/vision-web/docker-compose.yml new file mode 100644 index 0000000..a3873b6 --- /dev/null +++ b/workers/vision-web/docker-compose.yml @@ -0,0 +1,12 @@ +version: '3' +services: + app: + build: + dockerfile: Dockerfile.dev + context: . + env_file: + - .env + ports: + - 9000:80 + volumes: + - ./src:/app/src diff --git a/workers/vision-web/requirements.txt b/workers/vision-web/requirements.txt new file mode 100644 index 0000000..8d5fa32 --- /dev/null +++ b/workers/vision-web/requirements.txt @@ -0,0 +1,17 @@ +blinker==1.8.2 +certifi==2024.6.2 +charset-normalizer==3.3.2 +click==8.1.7 +Flask==3.0.3 +gunicorn==22.0.0 +idna==3.7 +itsdangerous==2.2.0 +Jinja2==3.1.4 +MarkupSafe==2.1.5 +onnxruntime-genai==0.3.0rc2 +packaging==24.1 +python-dotenv==1.0.1 +requests==2.32.3 +structlog==24.2.0 +urllib3==2.2.2 +Werkzeug==3.0.3 diff --git a/workers/vision-web/src/__init__.py b/workers/vision-web/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/workers/vision-web/src/actions/__init__.py b/workers/vision-web/src/actions/__init__.py new file mode 100644 index 0000000..14ed358 --- /dev/null +++ b/workers/vision-web/src/actions/__init__.py @@ -0,0 +1,2 @@ +from .process_handler import * +from .types import * diff --git a/workers/vision-web/src/actions/process_handler.py b/workers/vision-web/src/actions/process_handler.py new file mode 100644 index 0000000..210247a --- /dev/null +++ b/workers/vision-web/src/actions/process_handler.py @@ -0,0 +1,100 @@ +from request_types import EvidenceCreatedBody +from services import AShirtRequestsService +from constants import SupportedContentType +from .types import ProcessResultDTO +import onnxruntime_genai as og +import uuid +import os +import io + +model = og.Model('model/cpu-int4-rtn-block-32-acc-level-4/') +processor = model.create_multimodal_processor() +tokenizer_stream = processor.create_stream() + +def handle_evidence_created(body: EvidenceCreatedBody) -> ProcessResultDTO: + """ + handle_process is called when a web request comess in, is validated, and indicates that work + needs to be done on a piece of evidence + """ + accepted_types = [ + SupportedContentType.IMAGE + ] + + if body.content_type in accepted_types: + ashirt_svc = AShirtRequestsService( + os.environ.get('ASHIRT_BACKEND_URL', ''), + os.environ.get('ASHIRT_ACCESS_KEY', ''), + os.environ.get('ASHIRT_SECRET_KEY', '') + ) + # Gather content + evidence_content = ashirt_svc.get_evidence_content( + body.operation_slug, body.evidence_uuid, 'media' + ) + if type(evidence_content) == str: + print("got a string response") + elif type(evidence_content) != bytes: + return + + temp_image_path = f"/tmp/{uuid.uuid4()}.png" # Generate a unique name for the image file in /tmp directory + with open(temp_image_path, "wb") as f: + f.write(io.BytesIO(evidence_content).getbuffer()) + img = og.Images.open(temp_image_path) + + default_questions = [ + "What times are shown in the image?", + "Which applications are open in the image?", + "Which operating system is being used in the image?", + "What does the image say?" + ] + questions = os.environ.get('VISION_QUESTIONS', ','.join(default_questions)) + questions = questions.split(',') # Convert question(s) to a list + + resp = [] + for q in questions: + resp.append(do_ai(question=q,image=img)) # Run inference for each question + chunks = [f'Q:{x[0]}\nA:{x[1]}\n' for x in zip(questions,resp)] + os.remove(temp_image_path) # Delete the image file after using it + + return { + 'action': 'processed', + 'content': '\n'.join(chunks) + } + else: + return { + 'action': 'rejected' + } + +def do_ai(question, image=None): + generated_text = "" + prompt = "<|user|>\n" + if not image: + print("No image provided") + else: + print("Loading image...") + prompt += "<|image_1|>\n" + + prompt += f"{question}<|end|>\n<|assistant|>\n" + print("Processing image and prompt...") + inputs = processor(prompt, images=image) + + print("Generating response...") + params = og.GeneratorParams(model) + params.set_inputs(inputs) + params.set_search_options(max_length=3072) + + generator = og.Generator(model, params) + + while not generator.is_done(): + generator.compute_logits() + generator.generate_next_token() + + new_token = generator.get_next_tokens()[0] + decoded_text = tokenizer_stream.decode(new_token) + generated_text += decoded_text + print(decoded_text, end='', flush=True) + for _ in range(3): + print() + generated_text = generated_text.replace('','') + # Delete the generator to free the captured graph before creating another one + del generator + return generated_text \ No newline at end of file diff --git a/workers/vision-web/src/actions/types.py b/workers/vision-web/src/actions/types.py new file mode 100644 index 0000000..50402b9 --- /dev/null +++ b/workers/vision-web/src/actions/types.py @@ -0,0 +1,20 @@ +from typing import Literal, Optional, TypedDict, Union + + +class ProcessResultNormal(TypedDict): + action: Literal['rejected', 'error'] + content: Optional[str] + + +class ProcessResultComplete(TypedDict): + action: Literal['processed'] + content: str + + +class ProcessResultDeferred(TypedDict): + action: Literal['deferred'] + + +ProcessResultDTO = Union[ProcessResultNormal, + ProcessResultComplete, + ProcessResultDeferred] diff --git a/workers/vision-web/src/constants/__init__.py b/workers/vision-web/src/constants/__init__.py new file mode 100644 index 0000000..7d8d6f7 --- /dev/null +++ b/workers/vision-web/src/constants/__init__.py @@ -0,0 +1,4 @@ +from .supported_content_type import * + +STATE_NAME = 'state' +APP_LOGGER = 'logger' diff --git a/workers/vision-web/src/constants/supported_content_type.py b/workers/vision-web/src/constants/supported_content_type.py new file mode 100644 index 0000000..9a10951 --- /dev/null +++ b/workers/vision-web/src/constants/supported_content_type.py @@ -0,0 +1,22 @@ +from enum import Enum, auto + + +class SupportedContentType(Enum): + HTTP_REQUEST_CYCLE = auto() + TERMINAL_RECORDING = auto() + CODEBLOCK = auto() + EVENT = auto() + IMAGE = auto() + NONE = auto() + + @staticmethod + def from_str(s: str): + values: dict[str, SupportedContentType] = { + "http-request-cycle": SupportedContentType.HTTP_REQUEST_CYCLE, + "terminal-recording": SupportedContentType.TERMINAL_RECORDING, + "codeblock": SupportedContentType.CODEBLOCK, + "event": SupportedContentType.EVENT, + "image": SupportedContentType.IMAGE, + "none": SupportedContentType.NONE, + } + return values[s] diff --git a/workers/vision-web/src/helpers/__init__.py b/workers/vision-web/src/helpers/__init__.py new file mode 100644 index 0000000..e1c7f01 --- /dev/null +++ b/workers/vision-web/src/helpers/__init__.py @@ -0,0 +1,2 @@ +from .is_literal import * +from .flask_helpers import * diff --git a/workers/vision-web/src/helpers/flask_helpers.py b/workers/vision-web/src/helpers/flask_helpers.py new file mode 100644 index 0000000..31d0165 --- /dev/null +++ b/workers/vision-web/src/helpers/flask_helpers.py @@ -0,0 +1,22 @@ +from flask import ( + current_app, Response, make_response, Flask, +) + + +def jsonify_no_content() -> Response: + """ + jsonify_no_content produces a 204 (no content) response + """ + # from https://www.erol.si/2018/03/flask-return-204-no-content-response/ + response = make_response('', 204) + response.mimetype = current_app.config['JSONIFY_MIMETYPE'] + + return response + + +def remove_flask_logging(app: Flask) -> None: + # See: https://gist.github.com/daryltucker/e40c59a267ea75db12b1 + import logging + app.logger.disabled = True + log = logging.getLogger('werkzeug') + log.disabled = True diff --git a/workers/vision-web/src/helpers/is_literal.py b/workers/vision-web/src/helpers/is_literal.py new file mode 100644 index 0000000..ab1e858 --- /dev/null +++ b/workers/vision-web/src/helpers/is_literal.py @@ -0,0 +1,13 @@ +from typing import Any + + +def is_literal(v: Any, expectedType: type, expectedValue: Any) -> bool: + """ + is_literal is a small helper that verifies that the value passed has the expected type + and the expected value. This is useful to validate literal values provided by an external + service + """ + return ( + type(v) == expectedType + and v == expectedValue + ) diff --git a/workers/vision-web/src/main.py b/workers/vision-web/src/main.py new file mode 100644 index 0000000..22bdba8 --- /dev/null +++ b/workers/vision-web/src/main.py @@ -0,0 +1,45 @@ +import os + +from dotenv import dotenv_values +from flask import Flask +import structlog + +from constants import APP_LOGGER, STATE_NAME +from helpers import remove_flask_logging +from project_config import ProjectConfig +from routes import (ashirt, dev) +from services import AShirtRequestsService +from services import set_service + + +def create_app() -> Flask: + app = Flask(__name__) + + full_env = { + **dotenv_values(".env"), + **os.environ + } + cfg = ProjectConfig.from_dict(full_env) + app.config[STATE_NAME] = cfg + app.config[APP_LOGGER] = structlog.get_logger() + + set_service( + AShirtRequestsService(cfg.backend_url, cfg.access_key, cfg.secret_key_b64) + ) + app.register_blueprint(ashirt.bp) # Add normal routes + if cfg.dev_mode: + app.config[APP_LOGGER].msg("Adding dev routes") + app.register_blueprint(dev.bp) # Add dev routes + + # tweak logging settings + remove_flask_logging(app) + return app + + +if __name__ == "__main__": + app = create_app() + try: + app.config[APP_LOGGER].msg("App Starting") + app.run(host="0.0.0.0", port=app.config[STATE_NAME].port) + finally: + app.config[APP_LOGGER].msg("App Exiting") diff --git a/workers/vision-web/src/project_config.py b/workers/vision-web/src/project_config.py new file mode 100644 index 0000000..7215912 --- /dev/null +++ b/workers/vision-web/src/project_config.py @@ -0,0 +1,34 @@ +from dataclasses import dataclass + + +@dataclass(frozen=True) +class ProjectConfig: + """ + ProjectConfig stores the configuration read from the passed dictionary, if using from_dict + (this is intended to be os.environ). You can then access these values via the fields below. + """ + dev_mode: bool + backend_url: str + access_key: str + secret_key_b64: str + port: str + + @classmethod + def from_dict(cls, data: dict[str, str]): + """ + from_dict attempts to get all of the configuration needs from the provided dictionary. + If a field is not in the dictionary, then the default value is used instead. + """ + dev_mode = data.get('ENABLE_DEV', 'false').lower() == 'true' + backend_url = data.get('ASHIRT_BACKEND_URL', '') + access_key = data.get('ASHIRT_ACCESS_KEY', '') + secret_key_b64 = data.get('ASHIRT_SECRET_KEY', '') + port = data.get('PORT', '5000') + + return cls( + dev_mode=dev_mode, + backend_url=backend_url, + access_key=access_key, + secret_key_b64=secret_key_b64, + port=port, + ) diff --git a/workers/vision-web/src/request_types/__init__.py b/workers/vision-web/src/request_types/__init__.py new file mode 100644 index 0000000..0b58862 --- /dev/null +++ b/workers/vision-web/src/request_types/__init__.py @@ -0,0 +1,2 @@ +from .evidence_created_body import * +from .test_body import * diff --git a/workers/vision-web/src/request_types/evidence_created_body.py b/workers/vision-web/src/request_types/evidence_created_body.py new file mode 100644 index 0000000..9cd159e --- /dev/null +++ b/workers/vision-web/src/request_types/evidence_created_body.py @@ -0,0 +1,34 @@ +from dataclasses import dataclass +from typing import Any, Literal + +from constants import SupportedContentType +from helpers import is_literal +from .generic_request_body import GenericRequestBody + + +@dataclass(repr=False, frozen=True) +class EvidenceCreatedBody(GenericRequestBody): + """ + EvidenceCreatedBody reflects the message received from AShirt when AShirt requests metadata processing + """ + type: Literal['evidence_created'] + evidence_uuid: str + operation_slug: str + content_type: SupportedContentType + + def is_valid_instance(self) -> bool: + return all([ + is_literal(self.type, str, 'evidence_created'), + type(self.evidence_uuid) is str, + type(self.operation_slug) is str, + type(self.content_type) is SupportedContentType, + ]) + + @classmethod + def from_json(cls, data: dict[str, Any]): + cls.type = data['type'] + cls.evidence_uuid = data['evidenceUuid'] + cls.operation_slug = data['operationSlug'] + cls.content_type = SupportedContentType.from_str(data['contentType']) + + return cls diff --git a/workers/vision-web/src/request_types/generic_request_body.py b/workers/vision-web/src/request_types/generic_request_body.py new file mode 100644 index 0000000..e5c48f2 --- /dev/null +++ b/workers/vision-web/src/request_types/generic_request_body.py @@ -0,0 +1,25 @@ +from abc import ABC, abstractmethod +from typing import Any + + +class GenericRequestBody(ABC): + + @abstractmethod + def from_json(cls, data: dict[str, Any]): + pass + + @abstractmethod + def is_valid_instance(self, data: dict[str, Any]) -> bool: + pass + + @classmethod + def parse_if_valid(cls, data: dict[str, Any]): + """ + parse_if_valid checks that the given data is valid, then parses it. + if is not valid, or if an error occurs when parsing, then None is returned + """ + try: + inst = cls.from_json(data) + return cls if cls.is_valid_instance(inst) else None + except (KeyError): + return None diff --git a/workers/vision-web/src/request_types/test_body.py b/workers/vision-web/src/request_types/test_body.py new file mode 100644 index 0000000..d0524fb --- /dev/null +++ b/workers/vision-web/src/request_types/test_body.py @@ -0,0 +1,23 @@ +from dataclasses import dataclass +from typing import Any, Literal + +from helpers import is_literal +from .generic_request_body import GenericRequestBody + + +@dataclass(repr=False, frozen=True) +class TestBody(GenericRequestBody): + """ + TestBody reflects the message received from AShirt when AShirt requests testing + """ + + type: Literal['test'] + + def is_valid_instance(self) -> bool: + return is_literal(self.type, str, 'test') + + @classmethod + def from_json(cls, data: dict[str, Any]): + cls.type = data['type'] + + return cls diff --git a/workers/vision-web/src/routes/__init__.py b/workers/vision-web/src/routes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/workers/vision-web/src/routes/ashirt.py b/workers/vision-web/src/routes/ashirt.py new file mode 100644 index 0000000..0db2509 --- /dev/null +++ b/workers/vision-web/src/routes/ashirt.py @@ -0,0 +1,76 @@ +from flask import ( + Blueprint, request, current_app, jsonify, Response, g +) +import json +from uuid import uuid4 + +from constants import APP_LOGGER +from request_types import (EvidenceCreatedBody, TestBody) +from state import RequestState +import actions + +from .types import StatusCode + + +bp = Blueprint('ashirt', __name__, url_prefix='/') + + +@bp.route("/process", methods=['POST']) +def process_request() -> Response: + """ + process_request handles requests received from AShirt + """ + data = request.json + if TestBody.parse_if_valid(data) is not None: + return jsonify({"status": "ok"}) + + if (body := EvidenceCreatedBody.parse_if_valid(data)) is not None: + action_result = actions.handle_evidence_created(body) + # Construct a response that provides a body when a body is meaningful + rtn = ( + Response() + if action_result.get('content') is None + else Response(json.dumps(action_result)) + ) + + rtn.status_code = { + 'processed': StatusCode.OK.value, + 'deferred': StatusCode.ACCEPTED.value, + 'error': StatusCode.INTERNAL_SERVICE_ERROR.value, + 'rejected': StatusCode.NOT_ACCEPTABLE.value, + }[action_result['action']] + return rtn + + return Response('Unsupported Body Type', status=501) + + +########## Blueprint Stuff ############ + +@bp.before_request +def on_request_received(): + """ + on_request_received established a state for the request, complete with a logger. Also logs the + start of the request for anaylitics purposes + """ + ctx = str(uuid4()) + app_logger = current_app.config[APP_LOGGER] + req_log = app_logger.bind(context=ctx) + + req_log.msg("Received Request", + method=request.method, + endpoint=request.full_path, + query=request.query_string) + g._request_state = RequestState(req_log) + + +@bp.after_request +def on_request_complete(resp): + """ + on_request_complete logs when the request has been completed + """ + req_state = g._request_state + if type(req_state) == RequestState: + g._request_state.req_log.msg( + "Request Complete", response_code=resp.status_code, body=resp.data) + + return resp diff --git a/workers/vision-web/src/routes/dev.py b/workers/vision-web/src/routes/dev.py new file mode 100644 index 0000000..9b7d534 --- /dev/null +++ b/workers/vision-web/src/routes/dev.py @@ -0,0 +1,22 @@ +from flask import ( + Blueprint, jsonify, Response +) + +# from services import svc + +# Apply the user's suggested edit +bp = Blueprint('dev', __name__, url_prefix='/') + + +@bp.route("/") +def index() -> Response: + """index provides a method to verify that the service is live""" + return jsonify({ + "msg": "GET /" + }) + +@bp.route("/test", methods=['POST']) +def test() -> Response: + """test provides a place to verify that individual steps work as expected""" + + return jsonify({"Done": "you bet!"}) diff --git a/workers/vision-web/src/routes/types.py b/workers/vision-web/src/routes/types.py new file mode 100644 index 0000000..392d2d2 --- /dev/null +++ b/workers/vision-web/src/routes/types.py @@ -0,0 +1,12 @@ +from enum import Enum + + +class StatusCode(Enum): + """StatusCode is a set of status codes supported by AShirt.""" + OK = 200 + ACCEPTED = 202 + NO_CONTENT = 204 + BAD_REQUEST = 400 + NOT_ACCEPTABLE = 406 + INTERNAL_SERVICE_ERROR = 500 + NOT_IMPLEMENTED = 501 diff --git a/workers/vision-web/src/services/__init__.py b/workers/vision-web/src/services/__init__.py new file mode 100644 index 0000000..070244e --- /dev/null +++ b/workers/vision-web/src/services/__init__.py @@ -0,0 +1,23 @@ +from .helpers import * +from .types import * +from .ashirt_base_class import * +from .ashirt_sync import * + + +_ashirt_service: AShirtService + + +def set_service(svc: AShirtService): + """ + set_service stores an instance of a concrete AShirtService class. This is paired with svc to + allow making requests anywhere in the appliction. + """ + global _ashirt_service + _ashirt_service = svc + + +def svc() -> AShirtService: + """ + svc provides an established, concrete AShirtService class that can make requests to AShirt. + """ + return _ashirt_service diff --git a/workers/vision-web/src/services/ashirt_base_class.py b/workers/vision-web/src/services/ashirt_base_class.py new file mode 100644 index 0000000..a2526d0 --- /dev/null +++ b/workers/vision-web/src/services/ashirt_base_class.py @@ -0,0 +1,134 @@ +from abc import ABC, abstractmethod +from base64 import b64decode +import json +from typing import Any, Callable, Literal, Optional + +from . import ( + encode_form, + make_hmac, + now_in_rfc1123, + RequestConfig as RC, + CreateOperationInput, + CreateEvidenceInput, + CreateTagInput, + UpdateEvidenceInput, + UpsertEvidenceMetadata +) + + +class AShirtService(ABC): + """ + AShirtService is an abstract class that holds the necessary details to construct a request with + the proper headers to contact the AShirt backend. Note that this goes up to modeling the request. + The actual sending of the request is left to the subclasses. + """ + + def __init__(self, api_url: str, access_key: str, secret_key_b64: str): + self.api_url = api_url + self.access_key = access_key + self.secret_key = b64decode(secret_key_b64) + + @abstractmethod + def _make_request(cls, cfg: RC, headers: dict[str, str], body: Optional[bytes])->bytes|str|int: + """ + _make_request is an abstract method designed to actually make the request. Subclasses will + need to implement this with the boilerplate code that actually does the request. + """ + pass + + def get_operations(self): + return self.build_request(RC('GET', '/api/operations')) + + def create_operation(self, i: CreateOperationInput): + return self.build_request(RC('POST', '/api/operations', json.dumps(i))) + + def check_connection(self): + return self.build_request(RC('GET', '/api/checkconnection')) + + def get_evidence(self, operation_slug: str, evidence_uuid: str): + return self.build_request(RC('GET', f'/api/operations/{operation_slug}/evidence/{evidence_uuid}')) + + def get_evidence_content(self, operation_slug: str, evidence_uuid: str, content_type: Literal['media', 'preview']='media'): + return self.build_request(RC( + 'GET', + f'/api/operations/{operation_slug}/evidence/{evidence_uuid}/{content_type}', + None, + 'raw' + )) + + def create_evidence(self, operation_slug: str, i: CreateEvidenceInput): + body = { + 'notes': i['notes'], + } + add_if_not_none(body, 'contentType', i.get('content_type')) + add_if_not_none(body, 'tagIds', i.get('tag_ids'), json.dumps) + + data = encode_form(body, {"file": i.get('file')}) + + return self.build_request(RC('POST', + f'/api/operations/{operation_slug}/evidence', + body=data['data'], + multipart_boundary=data['boundary']) + ) + + def update_evidence(self, operation_slug: str, evidence_uuid: str, i: UpdateEvidenceInput): + body = {} + + add_if_not_none(body, 'notes', i.get('notes')) + add_if_not_none(body, 'contentType', i.get('content_type')) + add_if_not_none(body, 'tagsToAdd', i.get('add_tag_ids'), json.dumps) + add_if_not_none(body, 'tagsToRemove', i.get('remove_tag_ids'), json.dumps) + + data = encode_form(body, {"file": i.get('file')}) + + return self.build_request(RC('PUT', + f'/api/operations/{operation_slug}/evidence/{evidence_uuid}', + body=data['data'], + multipart_boundary=data['boundary'], + return_type='status' + )) + + def upsert_evidence_metadata(self, operation_slug: str, evidence_uuid: str, i: UpsertEvidenceMetadata): + return self.build_request(RC( + 'PUT', + f'/api/operations/{operation_slug}/evidence/{evidence_uuid}/metadata', + body=json.dumps(i), + return_type='status' + )) + + def get_operation_tags(self, operation_slug: str): + return self.build_request(RC('GET', f'/api/operations/{operation_slug}/tags')) + + def create_operation_tag(self, operation_slug: str, i: CreateTagInput): + return self.build_request(RC('POST', f'/api/operations/{operation_slug}/tags', json.dumps(i))) + + def build_request(self, cfg: RC): + """ + build_request models a request, and the passes the request to the actual executor methods + (_make_request) + """ + now = now_in_rfc1123() + + # with_body should now be either bytes or None + with_body = cfg.body.encode() if type(cfg.body) is str else cfg.body + + auth = make_hmac(cfg.method, cfg.path, now, with_body, + self.access_key, self.secret_key) + + if cfg.multipart_boundary is None: + content_type = "application/json" + else: + content_type = f'multipart/form-data; boundary={cfg.multipart_boundary}' + + headers = { + "Content-Type": content_type, + "Date": now, + "Authorization": auth, + } + + return self._make_request(cfg, headers, with_body) + + +def add_if_not_none(body: dict[str, Any], key: str, value: Any, tf: Callable[[Any], Any]=None): + if value is not None: + body.update({key: value if tf is None else tf(value)}) diff --git a/workers/vision-web/src/services/ashirt_sync.py b/workers/vision-web/src/services/ashirt_sync.py new file mode 100644 index 0000000..f788327 --- /dev/null +++ b/workers/vision-web/src/services/ashirt_sync.py @@ -0,0 +1,32 @@ +from typing import Optional +import requests + +from .ashirt_base_class import AShirtService +from . import ( + RequestConfig as RC, +) + + +class AShirtRequestsService(AShirtService): + """ + AShirtRequestsService is a subclass of AShirtService that makes requests using the Requests + library. This is a sychronous library, and so care needs to be taken when using this service. + """ + def __init__(self, api_url: str, access_key: str, secret_key_b64: str): + super().__init__(api_url, access_key, secret_key_b64) + + def _make_request(self, cfg: RC, headers: dict[str, str], body: Optional[bytes])->bytes: + resp = requests.request( + cfg.method, self._route_to(cfg.path), headers=headers, data=body, stream=True) + + if cfg.return_type == 'json': + return resp.json() + elif cfg.return_type == 'status': + return resp.status_code + elif cfg.return_type == 'text': + return resp.text + + return resp.content + + def _route_to(self, path: str): + return f'{self.api_url}{path}' diff --git a/workers/vision-web/src/services/helpers.py b/workers/vision-web/src/services/helpers.py new file mode 100644 index 0000000..bf66260 --- /dev/null +++ b/workers/vision-web/src/services/helpers.py @@ -0,0 +1,82 @@ +from base64 import b64encode, urlsafe_b64encode +from datetime import datetime +import hashlib +import hmac +import os +from typing import Optional +from wsgiref.handlers import format_date_time + +from .types import HTTP_METHOD, FileData, MultipartData + + +def make_hmac( + method: HTTP_METHOD, + path: str, + date: str, + body: Optional[bytes], + access_key: str, + secret_key: bytes +): + """ + make_hamc builds the authentication string needed to contact ashirt. + """ + body_digest_method = hashlib.sha256() + if body is not None: + body_digest_method.update(body) + body_digest = body_digest_method.digest() + + to_be_hashed = f'{method}\n{path}\n{date}\n' + full_message = to_be_hashed.encode() + body_digest + + hmacMessage = b64encode( + hmac.new(secret_key, full_message, hashlib.sha256).digest()) + + return f'{access_key}:{hmacMessage.decode("ascii")}' + + +def now_in_rfc1123(): + """now_in_rfc1123 constructs a date like: Wed, May 11 2022 09:29:02 GMT""" + return format_date_time(datetime.now().timestamp()) + + +def _random_char(length: int): + return urlsafe_b64encode(os.urandom(length)) + + +def encode_form(fields: dict[str, str], files: dict[str, FileData]) -> MultipartData: + boundary = "----AShirtFormData-".encode() + _random_char(30) + newline = "\r\n".encode() + part = "--".encode() + boundary_start = part + boundary + newline + last_boundary = part + boundary + part + newline + content_dispo = "Content-Disposition: form-data".encode() + + field_buff = bytes() + for key, value in fields.items(): + entry = ( + boundary_start + + content_dispo + f'; name="{key}"'.encode() + + newline + newline + + value.encode() + + newline + ) + field_buff += entry + + file_buff = bytes() + for key, value in files.items(): + if value is None: + continue + entry = ( + boundary_start + + content_dispo + f'; name="{key}"; filename="{value["filename"]}"'.encode() + + newline + f'Content-Type: {value["mimetype"]}'.encode() + + newline + newline + + value['content'] + + newline + ) + file_buff += entry + + return { + "boundary": boundary.decode(), + "data": field_buff + file_buff + last_boundary + } diff --git a/workers/vision-web/src/services/types.py b/workers/vision-web/src/services/types.py new file mode 100644 index 0000000..cb1890c --- /dev/null +++ b/workers/vision-web/src/services/types.py @@ -0,0 +1,80 @@ +from dataclasses import dataclass +import mimetypes +from typing import Literal, Optional, TypedDict + +from constants.supported_content_type import SupportedContentType + +HTTP_METHOD = Literal['GET', 'POST', 'PUT', 'DELETE'] + + +class FileData(TypedDict): + filename: str + mimetype: str + content: bytes + + +@dataclass(frozen=True) +class RequestConfig: + """ + RequestConfig abstracts a request so that it can be sent via different libraries, + in case you don't like requests + """ + method: HTTP_METHOD + path: str + body: Optional[bytes | str] = None + return_type: Literal["json", "raw", "status", "text"] = "json" + multipart_boundary: Optional[str] = None + + +# The below are all inputs for various API calls + +class CreateOperationInput(TypedDict): + slug: str + name: str + + +class CreateEvidenceInput(TypedDict): + notes: str + content_type: Optional[SupportedContentType] + tag_ids: Optional[list[int]] + file: Optional[FileData] + + +class UpdateEvidenceInput(TypedDict): + notes: Optional[str] + content_type: Optional[SupportedContentType] + add_tag_ids: Optional[list[int]] + remove_tag_ids: Optional[list[int]] + file: Optional[FileData] + + +class UpsertEvidenceMetadata(TypedDict): + source: str + body: str + status: str + message: Optional[str] + canProcess: Optional[bool] + + +class CreateTagInput(TypedDict): + name: str + colorName: Optional[str] + + +class MultipartData(TypedDict): + boundary: str + data: bytes + + +def parse_file(filename: str, binary=True): + method = 'rb' if binary else 'r' + with open(filename, method) as fh: + data = fh.read(-1) + + mimetypes.guess_type(filename) + + return FileData( + filename=filename, + content=data, + mimetype="application/octet-stream" + ) diff --git a/workers/vision-web/src/state.py b/workers/vision-web/src/state.py new file mode 100644 index 0000000..b56fdd0 --- /dev/null +++ b/workers/vision-web/src/state.py @@ -0,0 +1,7 @@ +class RequestState(object): + """ + RequestState captures the memory needs of an in-flight request. If you need to store data + temporarily (for the lifetime of a request), you can stick it here + """ + def __init__(self, request_logger): + self.req_log = request_logger diff --git a/workers/vision-web/src/wsgi.py b/workers/vision-web/src/wsgi.py new file mode 100644 index 0000000..de92b92 --- /dev/null +++ b/workers/vision-web/src/wsgi.py @@ -0,0 +1,10 @@ +# Setting the import path so that flask development properly runs the code +# (Add "." to the import path searching) +import sys +from os.path import abspath, dirname + +sys.path.insert(0, dirname(abspath(__file__))) + +from main import create_app + +app = create_app() From 84530ba51c6123a15308dc3c79d8e7f49b2fe049 Mon Sep 17 00:00:00 2001 From: pookie Date: Wed, 19 Jun 2024 15:44:54 -0700 Subject: [PATCH 02/16] updating prod build --- workers/vision-web/Dockerfile.prod | 37 ++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 workers/vision-web/Dockerfile.prod diff --git a/workers/vision-web/Dockerfile.prod b/workers/vision-web/Dockerfile.prod new file mode 100644 index 0000000..381beec --- /dev/null +++ b/workers/vision-web/Dockerfile.prod @@ -0,0 +1,37 @@ +FROM --platform=linux/amd64 python:3.12-slim AS builder + +WORKDIR /build +COPY Pipfile.lock Pipfile ./ + +RUN /root/.local/bin/pipenv requirements > requirements.txt +### + +FROM --platform=linux/amd64 python:3.12-slim AS runner + +WORKDIR /app +COPY --from=builder /build/requirements.txt . +COPY --from=builder /build/model/ . +################################### +# Install other dependencies here # +################################### + +RUN pip install -r requirements.txt +RUN pip install --pre onnxruntime-genai numpy huggingface_hub +# Download the required model components +RUN huggingface-cli download microsoft/Phi-3-vision-128k-instruct-onnx-cpu --include cpu-int4-rtn-block-32-acc-level-4/* --local-dir model/ + +# COPY bin/docker_start.sh ./start.sh +COPY src . + +EXPOSE 80 + +# Run as Alpine's guest user +USER 405 + +# some guidance on using gunicorn in containers: +# https://pythonspeed.com/articles/gunicorn-in-docker/ +CMD ["gunicorn", "--worker-tmp-dir", "/dev/shm", \ + "--workers=1", "--threads=2", "--worker-class=gthread", \ + "--log-file=-", \ + "-b", "0.0.0.0:80", "wsgi:app"] + From da57ebaa1692e2f1bdd52ad110c265d95b176148 Mon Sep 17 00:00:00 2001 From: pookie Date: Wed, 19 Jun 2024 15:47:15 -0700 Subject: [PATCH 03/16] updating build --- workers/vision-web/Dockerfile.prod | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/workers/vision-web/Dockerfile.prod b/workers/vision-web/Dockerfile.prod index 381beec..d281a48 100644 --- a/workers/vision-web/Dockerfile.prod +++ b/workers/vision-web/Dockerfile.prod @@ -1,8 +1,9 @@ FROM --platform=linux/amd64 python:3.12-slim AS builder WORKDIR /build -COPY Pipfile.lock Pipfile ./ +COPY Pipfile.lock Pipfile ./ +RUN pip install --user pipenv RUN /root/.local/bin/pipenv requirements > requirements.txt ### @@ -15,7 +16,7 @@ COPY --from=builder /build/model/ . # Install other dependencies here # ################################### -RUN pip install -r requirements.txt +RUN pip install -r requirements.txt RUN pip install --pre onnxruntime-genai numpy huggingface_hub # Download the required model components RUN huggingface-cli download microsoft/Phi-3-vision-128k-instruct-onnx-cpu --include cpu-int4-rtn-block-32-acc-level-4/* --local-dir model/ From bb4ebd5563663621663483e052c1c57700d02b63 Mon Sep 17 00:00:00 2001 From: pookie Date: Wed, 19 Jun 2024 15:48:50 -0700 Subject: [PATCH 04/16] removing copy --- workers/vision-web/Dockerfile.prod | 1 - 1 file changed, 1 deletion(-) diff --git a/workers/vision-web/Dockerfile.prod b/workers/vision-web/Dockerfile.prod index d281a48..e14fb8d 100644 --- a/workers/vision-web/Dockerfile.prod +++ b/workers/vision-web/Dockerfile.prod @@ -11,7 +11,6 @@ FROM --platform=linux/amd64 python:3.12-slim AS runner WORKDIR /app COPY --from=builder /build/requirements.txt . -COPY --from=builder /build/model/ . ################################### # Install other dependencies here # ################################### From 60acc989c4fa71b86b83057a770e6a8f9cac25a4 Mon Sep 17 00:00:00 2001 From: pookie Date: Wed, 19 Jun 2024 15:52:31 -0700 Subject: [PATCH 05/16] increasing timeout --- workers/vision-web/Dockerfile.prod | 1 + 1 file changed, 1 insertion(+) diff --git a/workers/vision-web/Dockerfile.prod b/workers/vision-web/Dockerfile.prod index e14fb8d..a6834cb 100644 --- a/workers/vision-web/Dockerfile.prod +++ b/workers/vision-web/Dockerfile.prod @@ -33,5 +33,6 @@ USER 405 CMD ["gunicorn", "--worker-tmp-dir", "/dev/shm", \ "--workers=1", "--threads=2", "--worker-class=gthread", \ "--log-file=-", \ + "-t=600" \ "-b", "0.0.0.0:80", "wsgi:app"] From 11cc1fe013d21b6e318cce74dca4ca6f3e9ee781 Mon Sep 17 00:00:00 2001 From: pookie Date: Wed, 19 Jun 2024 15:55:07 -0700 Subject: [PATCH 06/16] fixing formatting --- workers/vision-web/Dockerfile.prod | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/workers/vision-web/Dockerfile.prod b/workers/vision-web/Dockerfile.prod index a6834cb..7b95276 100644 --- a/workers/vision-web/Dockerfile.prod +++ b/workers/vision-web/Dockerfile.prod @@ -33,6 +33,5 @@ USER 405 CMD ["gunicorn", "--worker-tmp-dir", "/dev/shm", \ "--workers=1", "--threads=2", "--worker-class=gthread", \ "--log-file=-", \ - "-t=600" \ - "-b", "0.0.0.0:80", "wsgi:app"] - + "--timeout=600", \ + "-b", "0.0.0.0:80", "wsgi:app"] \ No newline at end of file From 554f7c3a1102dc7b765ebc559f26a059dd1e8219 Mon Sep 17 00:00:00 2001 From: pookie Date: Wed, 19 Jun 2024 16:03:32 -0700 Subject: [PATCH 07/16] moving back to docker --- workers/vision-web/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/workers/vision-web/Makefile b/workers/vision-web/Makefile index e7f3dff..f9662bf 100644 --- a/workers/vision-web/Makefile +++ b/workers/vision-web/Makefile @@ -3,14 +3,14 @@ imageName = vision-web .PHONY: build_dev build_dev: - podman build -t $(imageNameDev) -f Dockerfile.dev . + docker build -t $(imageNameDev) -f Dockerfile.dev . .PHONY: build_release build_release: - podman build -t $(imageName) -f Dockerfile.prod . + docker build -t $(imageName) -f Dockerfile.prod . run_dev: - podman compose up --build + docker compose up --build .PHONY: test-test test-test: @@ -27,3 +27,4 @@ test-unsupported: curl -XPOST "http://localhost:9000/process" \ -H "Content-Type: application/json" \ -d '{"type": "unsupported"}' + From 0c03e7c5f23fe2ae131985a7a48f96284c0867ef Mon Sep 17 00:00:00 2001 From: pookie Date: Wed, 19 Jun 2024 16:03:46 -0700 Subject: [PATCH 08/16] changing to prod container --- workers/vision-web/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/vision-web/docker-compose.yml b/workers/vision-web/docker-compose.yml index a3873b6..1a41dea 100644 --- a/workers/vision-web/docker-compose.yml +++ b/workers/vision-web/docker-compose.yml @@ -2,7 +2,7 @@ version: '3' services: app: build: - dockerfile: Dockerfile.dev + dockerfile: Dockerfile.prod context: . env_file: - .env From 98ca59da05964d31a18dc5edc189504fb99fab5d Mon Sep 17 00:00:00 2001 From: pookie Date: Wed, 19 Jun 2024 16:06:27 -0700 Subject: [PATCH 09/16] example of default questions to ask --- workers/vision-web/.env.example | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 workers/vision-web/.env.example diff --git a/workers/vision-web/.env.example b/workers/vision-web/.env.example new file mode 100644 index 0000000..a05742a --- /dev/null +++ b/workers/vision-web/.env.example @@ -0,0 +1,10 @@ +# true/false. If true, enables some dev helpers +ENABLE_DEV=true +# Points to where the backend is located. This should not have a trailing slash +ASHIRT_BACKEND_URL=http://10.0.0.100:3000 +# The access key of a headless user +ASHIRT_ACCESS_KEY=gR6nVtaQmp2SvzIqLUWdedDk +# The secret key (in base64 format -- how it is delivered via the ashirt UI) of a headless user +ASHIRT_SECRET_KEY=WvtvxFaJS0mPs82nCzqamI+bOGXpq7EIQhg4UD8nxS5448XG9N0gNAceJGBLPdCA3kAzC4MdUSHnKCJ/lZD++A== +# Add a list of questions +VISION_QUESTIONS="What does the image say?" \ No newline at end of file From 3de99a5281389c2f443bff690c41a6f63ceadc79 Mon Sep 17 00:00:00 2001 From: pookie Date: Wed, 19 Jun 2024 16:07:29 -0700 Subject: [PATCH 10/16] reducing to single thread per worker --- workers/vision-web/Dockerfile.prod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/vision-web/Dockerfile.prod b/workers/vision-web/Dockerfile.prod index 7b95276..1792b62 100644 --- a/workers/vision-web/Dockerfile.prod +++ b/workers/vision-web/Dockerfile.prod @@ -31,7 +31,7 @@ USER 405 # some guidance on using gunicorn in containers: # https://pythonspeed.com/articles/gunicorn-in-docker/ CMD ["gunicorn", "--worker-tmp-dir", "/dev/shm", \ - "--workers=1", "--threads=2", "--worker-class=gthread", \ + "--workers=1", "--worker-class=gthread", \ "--log-file=-", \ "--timeout=600", \ "-b", "0.0.0.0:80", "wsgi:app"] \ No newline at end of file From cf8cff60addb48bca2ca0d46b11223fecf9018d4 Mon Sep 17 00:00:00 2001 From: pookie Date: Wed, 19 Jun 2024 16:18:56 -0700 Subject: [PATCH 11/16] fixing typo --- workers/vision-web/Dockerfile.dev | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/vision-web/Dockerfile.dev b/workers/vision-web/Dockerfile.dev index b9f35b8..edfd375 100644 --- a/workers/vision-web/Dockerfile.dev +++ b/workers/vision-web/Dockerfile.dev @@ -19,7 +19,7 @@ RUN /root/.local/bin/pipenv run pip install --pre onnxruntime-genai numpy ENV FLASK_APP src/wsgi.py ENV FLASK_DEBUG 1 -ENV FLAKS_ENV=developement +ENV FLASK_ENV=developement EXPOSE 80 From abde552bf255d96e3a29ccacc672b65d6a7e0cbd Mon Sep 17 00:00:00 2001 From: pookie Date: Wed, 19 Jun 2024 16:19:10 -0700 Subject: [PATCH 12/16] adding vision-web to builds --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d29e94b..2d95e02 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,7 +13,7 @@ jobs: name: Build and Push strategy: matrix: - service: [tesseract-lambda-python,keyword-lambda-python,tesseract-web,tesseract-lambda] + service: [tesseract-lambda-python,keyword-lambda-python,tesseract-web,tesseract-lambda,vision-web] fail-fast: false runs-on: ubuntu-latest steps: From 579be90057ad065fc8b2857c3e6aebb8c50f4c0c Mon Sep 17 00:00:00 2001 From: pookie Date: Wed, 19 Jun 2024 16:21:30 -0700 Subject: [PATCH 13/16] fixiing dockerfile path --- workers/vision-web/{Dockerfile.prod => Dockerfile} | 0 workers/vision-web/docker-compose.yml | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename workers/vision-web/{Dockerfile.prod => Dockerfile} (100%) diff --git a/workers/vision-web/Dockerfile.prod b/workers/vision-web/Dockerfile similarity index 100% rename from workers/vision-web/Dockerfile.prod rename to workers/vision-web/Dockerfile diff --git a/workers/vision-web/docker-compose.yml b/workers/vision-web/docker-compose.yml index 1a41dea..adc440d 100644 --- a/workers/vision-web/docker-compose.yml +++ b/workers/vision-web/docker-compose.yml @@ -2,7 +2,7 @@ version: '3' services: app: build: - dockerfile: Dockerfile.prod + dockerfile: Dockerfile context: . env_file: - .env From 6bcad051cda1bdac5ebdd3e901b0a247c2db1eac Mon Sep 17 00:00:00 2001 From: pookie Date: Wed, 19 Jun 2024 16:25:06 -0700 Subject: [PATCH 14/16] changing to port 8080 --- workers/vision-web/Dockerfile | 4 ++-- workers/vision-web/Dockerfile.dev | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/workers/vision-web/Dockerfile b/workers/vision-web/Dockerfile index 1792b62..40f1965 100644 --- a/workers/vision-web/Dockerfile +++ b/workers/vision-web/Dockerfile @@ -23,7 +23,7 @@ RUN huggingface-cli download microsoft/Phi-3-vision-128k-instruct-onnx-cpu --inc # COPY bin/docker_start.sh ./start.sh COPY src . -EXPOSE 80 +EXPOSE 8080 # Run as Alpine's guest user USER 405 @@ -34,4 +34,4 @@ CMD ["gunicorn", "--worker-tmp-dir", "/dev/shm", \ "--workers=1", "--worker-class=gthread", \ "--log-file=-", \ "--timeout=600", \ - "-b", "0.0.0.0:80", "wsgi:app"] \ No newline at end of file + "-b", "0.0.0.0:8080", "wsgi:app"] \ No newline at end of file diff --git a/workers/vision-web/Dockerfile.dev b/workers/vision-web/Dockerfile.dev index edfd375..9f70940 100644 --- a/workers/vision-web/Dockerfile.dev +++ b/workers/vision-web/Dockerfile.dev @@ -21,10 +21,10 @@ ENV FLASK_APP src/wsgi.py ENV FLASK_DEBUG 1 ENV FLASK_ENV=developement -EXPOSE 80 +EXPOSE 8080 COPY src ./src/ CMD ["/root/.local/bin/pipenv", "run", \ "flask", "run", \ - "--host=0.0.0.0", "--port=80"] + "--host=0.0.0.0", "--port=8080"] From 4cf51c475f046a3ca1faa725aa4d022b190cde51 Mon Sep 17 00:00:00 2001 From: pookie Date: Wed, 19 Jun 2024 16:25:18 -0700 Subject: [PATCH 15/16] changing to port 8080 --- workers/vision-web/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/vision-web/docker-compose.yml b/workers/vision-web/docker-compose.yml index adc440d..7c702f4 100644 --- a/workers/vision-web/docker-compose.yml +++ b/workers/vision-web/docker-compose.yml @@ -7,6 +7,6 @@ services: env_file: - .env ports: - - 9000:80 + - 9000:8080 volumes: - ./src:/app/src From 911401f3184f79ac1db7e64b3cdcea1114507525 Mon Sep 17 00:00:00 2001 From: pookie Date: Wed, 19 Jun 2024 16:25:30 -0700 Subject: [PATCH 16/16] updating readme --- workers/vision-web/Readme.md | 35 +++++------------------------------ 1 file changed, 5 insertions(+), 30 deletions(-) diff --git a/workers/vision-web/Readme.md b/workers/vision-web/Readme.md index 690e28d..5381af8 100644 --- a/workers/vision-web/Readme.md +++ b/workers/vision-web/Readme.md @@ -1,9 +1,4 @@ -# Python/Flask Enhancement Worker - -This worker is a microservice-type worker built on Python/Flask. -It uses [pipenv](https://pipenv.pypa.io/en/latest/) to manage dependencies. -This project strives to be as minimalistic as possible, but does include some helpful libraries. This -includes: +# Vision Web Worker * [Flask](https://flask.palletsprojects.com/en/2.1.x/), to manage the network connection * [gunicorn](https://gunicorn.org/), for production deployment @@ -23,7 +18,7 @@ The typical configuration for deploying this worker archetype is going to look r { "type": "web", "version": 1, - "url": "http://testapp/ashirt/process" + "url": "http://vision-web/process" } ``` @@ -33,26 +28,6 @@ Note the url: this is likely what will change for your version. Most programs should be able to largely ignore most of the code, and instead focus on `actions` directory, and specifically the events you want to target. -## How it works - -This section is mostly for those that need to do more than implement the core functionality. This application, like many other webservices, can be divided up into two phases: the startup phase, and the serving phase. - -### Startup Phase - -The Startup Phase is as you might expect: this state is entered once the application starts, and it is responsible for configuring the application for long-term running. The most important bit here is likely the configuration and route management. `create_app` within `main.py` will load configuration details from the environment (locally: `.env` file), create a class for handling requests to an AShirt backend, and register standard routes. Then, either the main line in `main.py` or `wsgi.py` will start the server. This phase ends once the server starts servicing requests, and the application then enters the serving phase. - -### Serving Phase - -The serving phase is largely controlled by what particular route is entered when a user contacts the server. The `routes` directory provides two set of routes: the `ashirt` routes, which are the routes that the AShirt backend will call, and the `dev` routes, which are designed to be only created in a development environment. These serve as helpers and sanity checks. - -When a route is reached -- in particular, when the process route is reached (see: `process_request`), then the service will kick off processing of that data. Some initial boilerplate style code manages the request, and directs all of the actual work to some function in the `actions` directory. These functions will return one of a handful of responses, which will be used to generate the true response to the AShirt backend. - -Once the request is complete, the application waits for another request. - -### Contacting AShirt - -The `services` folder contains a class that is used to contact ashirt. This is the typical way of getting the actual content / interacting with ashirt. This is treated mostly as a singleton by preparing the service in `main.py`, and recording an instance in `services/__init__.py`. Any other module can then use the `svc` function to get the loaded instance. - ## Integrating into AShirt testing environment Notably, the dev port exposed is port 8080, so all port mapping has to be done with that in mind. When running locally (not via docker), the exposed port is configurable. @@ -60,15 +35,15 @@ Notably, the dev port exposed is port 8080, so all port mapping has to be done w This configuration should work for your scenario, though the volumes mapped might need to be different. ```yaml - py_flask: + vision-web: build: - context: enhancement_worker_templates/web/python_flask + context: ashirt-workers/workers/vision-web dockerfile: Dockerfile.dev ports: - 3004:8080 restart: on-failure volumes: - - ./enhancement_worker_templates/web/python_flask/src:/app/src + - ./ashirt-workers/workers/vision-web/:/app/ environment: ENABLE_DEV: true ASHIRT_BACKEND_URL: http://backend:3000