From aa40dd128f7bbc59061a858d6ba89d82f498cdc1 Mon Sep 17 00:00:00 2001 From: martin Date: Thu, 14 May 2026 15:18:42 +0200 Subject: [PATCH 1/2] fix: deploy name conflict --- deploy.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/deploy.py b/deploy.py index d415284..4efed4b 100644 --- a/deploy.py +++ b/deploy.py @@ -165,8 +165,13 @@ def start_python_server(self, server_path: Path, port: int) -> ProcessInfo: def start_docker_compose(self, compose_file: Path, port: Optional[int] = None) -> ProcessInfo: """Start docker-compose service with optional port configuration""" platform = sys.platform - # Use instance ID as docker-compose project name for isolation - project_name = f"toolomics_{self.instance_id}" + # Project name must be unique per compose file: workspace-hash isolates + # *this workspace* from other workspaces, and the parent-directory slug + # isolates *this MCP* from sibling MCPs whose compose files declare the + # same service name (e.g. `app`). Sharing a project across MCPs collapses + # them onto the same image tag and container name. + service_slug = compose_file.parent.name.lower().replace('.', '_') + project_name = f"toolomics_{self.instance_id}_{service_slug}" if platform == "linux": cmd = ['docker', 'compose', '-p', project_name, '-f', str(compose_file), 'up', '-d'] else: From 4c0c7e94415dda40182e22f92e0fbf050c57fc2c Mon Sep 17 00:00:00 2001 From: martin Date: Fri, 29 May 2026 11:30:25 +0200 Subject: [PATCH 2/2] fix: ensure python3.12 env and avoid installing the full requirement.txt when fastmcp was enough --- .../searxng/searxng_3bb7bcd4/settings.yml | 2830 +++++++++++++++++ mcp_host/shell/Dockerfile | 14 +- .../geospatial-pure-python/SKILL.md | 476 +++ 3 files changed, 3314 insertions(+), 6 deletions(-) create mode 100644 mcp_host/browser/searxng/searxng_3bb7bcd4/settings.yml create mode 100644 mcp_host/skills/scientific-skills/scientific-skills/geospatial-pure-python/SKILL.md diff --git a/mcp_host/browser/searxng/searxng_3bb7bcd4/settings.yml b/mcp_host/browser/searxng/searxng_3bb7bcd4/settings.yml new file mode 100644 index 0000000..3f22abf --- /dev/null +++ b/mcp_host/browser/searxng/searxng_3bb7bcd4/settings.yml @@ -0,0 +1,2830 @@ +general: + # Debug mode, only for development. Is overwritten by ${SEARXNG_DEBUG} + debug: false + # displayed name + instance_name: "SearXNG" + # For example: https://example.com/privacy + privacypolicy_url: false + # use true to use your own donation page written in searx/info/en/donate.md + # use false to disable the donation link + donation_url: false + # mailto:contact@example.com + contact_url: false + # record stats + enable_metrics: true + # expose stats in open metrics format at /metrics + # leave empty to disable (no password set) + # open_metrics: + open_metrics: '' + +brand: + new_issue_url: https://github.com/searxng/searxng/issues/new + docs_url: https://docs.searxng.org/ + public_instances: https://searx.space + wiki_url: https://github.com/searxng/searxng/wiki + issue_url: https://github.com/searxng/searxng/issues + # custom: + # # Custom entries in the footer: [title]: [link] + # links: + # Uptime: https://uptime.searxng.org/history/darmarit-org + # About: "https://searxng.org" + +search: + # Filter results. 0: None, 1: Moderate, 2: Strict + safe_search: 0 + # Existing autocomplete backends: "360search", "baidu", "brave", "dbpedia", "duckduckgo", "google", "yandex", + # "mwmbl", "naver", "seznam", "sogou", "startpage", "stract", "swisscows", "quark", "qwant", "wikipedia" - + # leave blank to turn it off by default. + autocomplete: "" + # minimun characters to type before autocompleter starts + autocomplete_min: 4 + # backend for the favicon near URL in search results. + # Available resolvers: "allesedv", "duckduckgo", "google", "yandex" - leave blank to turn it off by default. + favicon_resolver: "" + # Default search language - leave blank to detect from browser information or + # use codes from 'languages.py' + default_lang: "auto" + # max_page: 0 # if engine supports paging, 0 means unlimited numbers of pages + # Available languages + # languages: + # - all + # - en + # - en-US + # - de + # - it-IT + # - fr + # - fr-BE + # ban time in seconds after engine errors + ban_time_on_fail: 5 + # max ban time in seconds after engine errors + max_ban_time_on_fail: 120 + suspended_times: + # Engine suspension time after error (in seconds; set to 0 to disable) + # For error "Access denied" and "HTTP error [402, 403]" + SearxEngineAccessDenied: 86400 + # For error "CAPTCHA" + SearxEngineCaptcha: 86400 + # For error "Too many request" and "HTTP error 429" + SearxEngineTooManyRequests: 3600 + # Cloudflare CAPTCHA + cf_SearxEngineCaptcha: 1296000 + cf_SearxEngineAccessDenied: 86400 + # ReCAPTCHA + recaptcha_SearxEngineCaptcha: 604800 + + # remove format to deny access, use lower case. + # formats: [html, csv, json, rss] + formats: + - html + +server: + # Is overwritten by ${SEARXNG_PORT} and ${SEARXNG_BIND_ADDRESS} + port: 8888 + bind_address: "127.0.0.1" + # public URL of the instance, to ensure correct inbound links. Is overwritten + # by ${SEARXNG_BASE_URL}. + base_url: false # "http://example.com/location" + # rate limit the number of request on the instance, block some bots. + # Is overwritten by ${SEARXNG_LIMITER} + limiter: false + # enable features designed only for public instances. + # Is overwritten by ${SEARXNG_PUBLIC_INSTANCE} + public_instance: false + + # If your instance owns a /etc/searxng/settings.yml file, then set the following + # values there. + + secret_key: "IJkokhFupPqDOQj6Lx8J89WsRioNf" # Is overwritten by ${SEARXNG_SECRET} + # Proxy image results through SearXNG. Is overwritten by ${SEARXNG_IMAGE_PROXY} + image_proxy: false + # 1.0 and 1.1 are supported + http_protocol_version: "1.0" + # POST queries are "more secure!" but are also the source of hard-to-locate + # annoyances, which is why GET may be better for end users and their browsers. + # see https://github.com/searxng/searxng/pull/3619 + # Is overwritten by ${SEARXNG_METHOD} + method: "POST" + default_http_headers: + X-Content-Type-Options: nosniff + X-Download-Options: noopen + X-Robots-Tag: noindex, nofollow + Referrer-Policy: no-referrer + +valkey: + # URL to connect valkey database. Is overwritten by ${SEARXNG_VALKEY_URL}. + # https://docs.searxng.org/admin/settings/settings_valkey.html#settings-valkey + # url: valkey://localhost:6379/0 + url: false + +ui: + # Custom static path - leave it blank if you didn't change + static_path: "" + # Custom templates path - leave it blank if you didn't change + templates_path: "" + # query_in_title: When true, the result page's titles contains the query + # it decreases the privacy, since the browser can records the page titles. + query_in_title: false + # infinite_scroll: When true, automatically loads the next page when scrolling to bottom of the current page. + infinite_scroll: false + # ui theme + default_theme: simple + # center the results ? + center_alignment: false + # URL prefix of the internet archive, don't forget trailing slash (if needed). + # cache_url: "https://webcache.googleusercontent.com/search?q=cache:" + # Default interface locale - leave blank to detect from browser information or + # use codes from the 'locales' config section + default_locale: "" + # Open result links in a new tab by default + # results_on_new_tab: false + theme_args: + # style of simple theme: auto, light, dark, black + simple_style: auto + # Perform search immediately if a category selected. + # Disable to select multiple categories at once and start the search manually. + search_on_category_select: true + # Hotkeys: default or vim + hotkeys: default + # URL formatting: pretty, full or host + url_formatting: pretty + +# Lock arbitrary settings on the preferences page. +# +# preferences: +# lock: +# - categories +# - language +# - autocomplete +# - favicon +# - safesearch +# - method +# - doi_resolver +# - locale +# - theme +# - results_on_new_tab +# - infinite_scroll +# - search_on_category_select +# - method +# - image_proxy +# - query_in_title + +# communication with search engines +# +outgoing: + # default timeout in seconds, can be override by engine + request_timeout: 3.0 + # the maximum timeout in seconds + # max_request_timeout: 10.0 + # suffix of searxng_useragent, could contain information like an email address + # to the administrator + useragent_suffix: "" + # The maximum number of concurrent connections that may be established. + pool_connections: 100 + # Allow the connection pool to maintain keep-alive connections below this + # point. + pool_maxsize: 20 + # See https://www.python-httpx.org/http2/ + enable_http2: true + # uncomment below section if you want to use a custom server certificate + # see https://www.python-httpx.org/advanced/#changing-the-verification-defaults + # and https://www.python-httpx.org/compatibility/#ssl-configuration + # verify: ~/.mitmproxy/mitmproxy-ca-cert.cer + # + # uncomment below section if you want to use a proxyq see: SOCKS proxies + # https://2.python-requests.org/en/latest/user/advanced/#proxies + # are also supported: see + # https://2.python-requests.org/en/latest/user/advanced/#socks + # + # proxies: + # all://: + # - http://proxy1:8080 + # - http://proxy2:8080 + # + # using_tor_proxy: true + # + # Extra seconds to add in order to account for the time taken by the proxy + # + # extra_proxy_timeout: 10 + # + # uncomment below section only if you have more than one network interface + # which can be the source of outgoing search requests + # + # source_ips: + # - 1.1.1.1 + # - 1.1.1.2 + # - fe80::/126 + +# Plugin configuration, for more details see +# https://docs.searxng.org/admin/settings/settings_plugins.html +# +plugins: + + searx.plugins.calculator.SXNGPlugin: + active: true + + searx.plugins.hash_plugin.SXNGPlugin: + active: true + + searx.plugins.self_info.SXNGPlugin: + active: true + + searx.plugins.unit_converter.SXNGPlugin: + active: true + + searx.plugins.ahmia_filter.SXNGPlugin: + active: true + + searx.plugins.hostnames.SXNGPlugin: + active: true + + searx.plugins.time_zone.SXNGPlugin: + active: true + + searx.plugins.oa_doi_rewrite.SXNGPlugin: + active: false + + searx.plugins.tor_check.SXNGPlugin: + active: false + + searx.plugins.tracker_url_remover.SXNGPlugin: + active: true + + +# Configuration of the "Hostnames plugin": +# +# hostnames: +# replace: +# '(.*\.)?youtube\.com$': 'yt.example.com' +# '(.*\.)?youtu\.be$': 'yt.example.com' +# '(.*\.)?reddit\.com$': 'teddit.example.com' +# '(.*\.)?redd\.it$': 'teddit.example.com' +# '(www\.)?twitter\.com$': 'nitter.example.com' +# remove: +# - '(.*\.)?facebook.com$' +# low_priority: +# - '(.*\.)?google(\..*)?$' +# high_priority: +# - '(.*\.)?wikipedia.org$' +# +# Alternatively you can use external files for configuring the "Hostnames plugin": +# +# hostnames: +# replace: 'rewrite-hosts.yml' +# +# Content of 'rewrite-hosts.yml' (place the file in the same directory as 'settings.yml'): +# '(.*\.)?youtube\.com$': 'yt.example.com' +# '(.*\.)?youtu\.be$': 'yt.example.com' +# + +checker: + # disable checker when in debug mode + off_when_debug: true + + # use "scheduling: {}" to disable scheduling + # scheduling: interval or int + + # to activate the scheduler: + # * uncomment "scheduling" section + # * add "cache2 = name=searxngcache,items=2000,blocks=2000,blocksize=4096,bitmap=1" + # to your uwsgi.ini + + # scheduling: + # start_after: [300, 1800] # delay to start the first run of the checker + # every: [86400, 90000] # how often the checker runs + + # additional tests: only for the YAML anchors (see the engines section) + # + additional_tests: + rosebud: &test_rosebud + matrix: + query: rosebud + lang: en + result_container: + - not_empty + - ['one_title_contains', 'citizen kane'] + test: + - unique_results + + android: &test_android + matrix: + query: ['android'] + lang: ['en', 'de', 'fr', 'zh-CN'] + result_container: + - not_empty + - ['one_title_contains', 'google'] + test: + - unique_results + + # tests: only for the YAML anchors (see the engines section) + tests: + infobox: &tests_infobox + infobox: + matrix: + query: ["linux", "new york", "bbc"] + result_container: + - has_infobox + +categories_as_tabs: + general: + images: + videos: + news: + map: + music: + it: + science: + files: + social media: + +engines: + - name: 360search + engine: 360search + shortcut: 360so + disabled: true + + - name: 360search videos + engine: 360search_videos + shortcut: 360sov + disabled: true + + - name: 9gag + engine: 9gag + shortcut: 9g + disabled: true + + - name: acfun + engine: acfun + shortcut: acf + disabled: true + + - name: adobe stock + engine: adobe_stock + shortcut: asi + categories: ["images"] + # https://docs.searxng.org/dev/engines/online/adobe_stock.html + adobe_order: relevance + adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"] + timeout: 6 + disabled: true + + - name: adobe stock video + engine: adobe_stock + shortcut: asv + network: adobe stock + categories: ["videos"] + adobe_order: relevance + adobe_content_types: ["video"] + timeout: 6 + disabled: true + + - name: adobe stock audio + engine: adobe_stock + shortcut: asa + network: adobe stock + categories: ["music"] + adobe_order: relevance + adobe_content_types: ["audio"] + timeout: 6 + disabled: true + + - name: alexandria + engine: json_engine + shortcut: alx + categories: general + paging: true + search_url: https://api.alexandria.org/?a=1&q={query}&p={pageno} + results_query: results + title_query: title + url_query: url + content_query: snippet + timeout: 1.5 + disabled: true + about: + website: https://alexandria.org/ + official_api_documentation: https://github.com/alexandria-org/alexandria-api/raw/master/README.md + use_official_api: true + require_api_key: false + results: JSON + + - name: astrophysics data system + engine: astrophysics_data_system + shortcut: ads + # read https://docs.searxng.org/dev/engines/online/astrophysics_data_system.html + api_key: "" + inactive: true + + - name: alpine linux packages + engine: alpinelinux + disabled: true + shortcut: alp + + - name: annas archive + engine: annas_archive + disabled: true + shortcut: aa + timeout: 5 + + - name: ansa + engine: ansa + shortcut: ans + disabled: true + + # - name: annas articles + # engine: annas_archive + # shortcut: aaa + # # https://docs.searxng.org/dev/engines/online/annas_archive.html + # aa_content: 'magazine' # book_fiction, book_unknown, book_nonfiction, book_comic + # aa_ext: 'pdf' # pdf, epub, .. + # aa_sort: oldest' # newest, oldest, largest, smallest + + - name: apk mirror + engine: apkmirror + timeout: 4.0 + shortcut: apkm + disabled: true + + - name: apple app store + engine: apple_app_store + shortcut: aps + disabled: true + + # Requires Tor + - name: ahmia + engine: ahmia + categories: onions + enable_http: true + shortcut: ah + + - name: anaconda + engine: xpath + paging: true + first_page_num: 0 + search_url: https://anaconda.org/search?q={query}&page={pageno} + results_xpath: //tbody/tr + url_xpath: ./td/h5/a[last()]/@href + title_xpath: ./td/h5 + content_xpath: ./td[h5]/text() + categories: it + timeout: 6.0 + shortcut: conda + disabled: true + + - name: arch linux wiki + engine: archlinux + shortcut: al + + - name: nixos wiki + engine: mediawiki + shortcut: nixw + base_url: https://wiki.nixos.org/ + search_type: text + disabled: true + categories: [it, software wikis] + + - name: artic + engine: artic + shortcut: arc + timeout: 4.0 + + - name: arxiv + engine: arxiv + shortcut: arx + + - name: ask + engine: ask + shortcut: ask + disabled: true + + # - name: azure + # engine: azure + # shortcut: az + # categories: [it, cloud] + # azure_tenant_id: "your_tenant_id" + # azure_client_id: "your_client_id" + # azure_client_secret: "your_client_secret" + # disabled: true + + # tmp suspended: dh key too small + # - name: base + # engine: base + # shortcut: bs + + - name: bandcamp + engine: bandcamp + shortcut: bc + categories: music + + - name: baidu + baidu_category: general + categories: [general] + engine: baidu + shortcut: bd + disabled: true + + - name: baidu images + baidu_category: images + categories: [images] + engine: baidu + shortcut: bdi + disabled: true + + - name: baidu kaifa + baidu_category: it + categories: [it] + engine: baidu + shortcut: bdk + disabled: true + + - name: wikipedia + engine: wikipedia + shortcut: wp + # add "list" to the array to get results in the results list + display_type: ["infobox"] + categories: [general] + + - name: bilibili + engine: bilibili + shortcut: bil + disabled: true + + - name: bing + engine: bing + shortcut: bi + disabled: true + + - name: bing images + engine: bing_images + shortcut: bii + + - name: bing news + engine: bing_news + shortcut: bin + + - name: bing videos + engine: bing_videos + shortcut: biv + + - name: bitchute + engine: bitchute + shortcut: bit + disabled: true + + - name: bitbucket + engine: xpath + paging: true + search_url: https://bitbucket.org/repo/all/{pageno}?name={query} + url_xpath: //article[@class="repo-summary"]//a[@class="repo-link"]/@href + title_xpath: //article[@class="repo-summary"]//a[@class="repo-link"] + content_xpath: //article[@class="repo-summary"]/p + categories: [it, repos] + timeout: 4.0 + disabled: true + shortcut: bb + about: + website: https://bitbucket.org/ + wikidata_id: Q2493781 + official_api_documentation: https://developer.atlassian.com/bitbucket + use_official_api: false + require_api_key: false + results: HTML + + - name: bpb + engine: bpb + shortcut: bpb + disabled: true + + - name: btdigg + engine: btdigg + shortcut: bt + disabled: true + + - name: openverse + engine: openverse + categories: images + shortcut: opv + + - name: media.ccc.de + engine: ccc_media + shortcut: c3tv + # We don't set language: de here because media.ccc.de is not just + # for a German audience. It contains many English videos and many + # German videos have English subtitles. + disabled: true + + - name: chefkoch + engine: chefkoch + shortcut: chef + # to show premium or plus results too: + # skip_premium: false + + # WARNING: links from chinaso.com voilate users privacy + # Before activate these engines its mandatory to read + # - https://github.com/searxng/searxng/issues/4694 + # - https://docs.searxng.org/dev/engines/online/chinaso.html + + - name: chinaso news + engine: chinaso + shortcut: chinaso + categories: [news] + chinaso_category: news + chinaso_news_source: all + disabled: true + inactive: true + + - name: chinaso images + engine: chinaso + network: chinaso news + shortcut: chinasoi + categories: [images] + chinaso_category: images + disabled: true + inactive: true + + - name: chinaso videos + engine: chinaso + network: chinaso news + shortcut: chinasov + categories: [videos] + chinaso_category: videos + disabled: true + inactive: true + + - name: cloudflareai + engine: cloudflareai + shortcut: cfai + # get api token and accont id from https://developers.cloudflare.com/workers-ai/get-started/rest-api/ + cf_account_id: 'your_cf_accout_id' + cf_ai_api: 'your_cf_api' + # create your ai gateway by https://developers.cloudflare.com/ai-gateway/get-started/creating-gateway/ + cf_ai_gateway: 'your_cf_ai_gateway_name' + # find the model name from https://developers.cloudflare.com/workers-ai/models/#text-generation + cf_ai_model: 'ai_model_name' + # custom your preferences + # cf_ai_model_display_name: 'Cloudflare AI' + # cf_ai_model_assistant: 'prompts_for_assistant_role' + # cf_ai_model_system: 'prompts_for_system_role' + timeout: 30 + disabled: true + + - name: core.ac.uk + engine: core + shortcut: cor + # read https://docs.searxng.org/dev/engines/online/core.html + api_key: "" + inactive: true + + - name: crossref + engine: crossref + shortcut: cr + timeout: 30 + disabled: true + + - name: crowdview + engine: json_engine + shortcut: cv + categories: general + paging: false + search_url: https://crowdview-next-js.onrender.com/api/search-v3?query={query} + results_query: results + url_query: link + title_query: title + content_query: snippet + title_html_to_text: true + content_html_to_text: true + disabled: true + about: + website: https://crowdview.ai/ + + - name: yep + engine: yep + shortcut: yep + categories: general + search_type: web + timeout: 5 + disabled: true + + - name: yep images + engine: yep + shortcut: yepi + categories: images + search_type: images + disabled: true + + - name: yep news + engine: yep + shortcut: yepn + categories: news + search_type: news + disabled: true + + - name: currency + engine: currency_convert + shortcut: cc + + - name: deezer + engine: deezer + shortcut: dz + disabled: true + + - name: destatis + engine: destatis + shortcut: destat + disabled: true + + - name: deviantart + engine: deviantart + shortcut: da + timeout: 3.0 + + - name: ddg definitions + engine: duckduckgo_definitions + shortcut: ddd + weight: 2 + disabled: true + tests: *tests_infobox + + # cloudflare protected + # - name: digbt + # engine: digbt + # shortcut: dbt + # timeout: 6.0 + # disabled: true + + - name: docker hub + engine: docker_hub + shortcut: dh + categories: [it, packages] + + - name: encyclosearch + engine: json_engine + shortcut: es + categories: general + paging: true + search_url: https://encyclosearch.org/encyclosphere/search?q={query}&page={pageno}&resultsPerPage=15 + results_query: Results + url_query: SourceURL + title_query: Title + content_query: Description + disabled: true + about: + website: https://encyclosearch.org + official_api_documentation: https://encyclosearch.org/docs/#/rest-api + use_official_api: true + require_api_key: false + results: JSON + + - name: erowid + engine: xpath + paging: true + first_page_num: 0 + page_size: 30 + search_url: https://www.erowid.org/search.php?q={query}&s={pageno} + url_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/@href + title_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/text() + content_xpath: //dl[@class="results-list"]/dd[@class="result-details"] + categories: [] + shortcut: ew + disabled: true + about: + website: https://www.erowid.org/ + wikidata_id: Q1430691 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + # - name: elasticsearch + # shortcut: els + # engine: elasticsearch + # base_url: http://localhost:9200 + # username: elastic + # password: changeme + # index: my-index + # enable_http: true + # # available options: match, simple_query_string, term, terms, custom + # query_type: match + # # if query_type is set to custom, provide your query here + # # custom_query_json: {"query":{"match_all": {}}} + # # show_metadata: false + # disabled: true + + - name: wikidata + engine: wikidata + shortcut: wd + timeout: 3.0 + weight: 2 + # add "list" to the array to get results in the results list + display_type: ["infobox"] + tests: *tests_infobox + categories: [general] + + - name: duckduckgo + engine: duckduckgo + shortcut: ddg + + - name: duckduckgo images + engine: duckduckgo_extra + categories: [images, web] + ddg_category: images + shortcut: ddi + disabled: true + + - name: duckduckgo videos + engine: duckduckgo_extra + categories: [videos, web] + ddg_category: videos + shortcut: ddv + disabled: true + + - name: duckduckgo news + engine: duckduckgo_extra + categories: [news, web] + ddg_category: news + shortcut: ddn + disabled: true + + - name: duckduckgo weather + engine: duckduckgo_weather + shortcut: ddw + disabled: true + + - name: apple maps + engine: apple_maps + shortcut: apm + disabled: true + timeout: 5.0 + + - name: emojipedia + engine: emojipedia + timeout: 4.0 + shortcut: em + disabled: true + + - name: tineye + engine: tineye + shortcut: tin + timeout: 9.0 + disabled: true + + - name: etymonline + engine: xpath + paging: true + search_url: https://etymonline.com/search?page={pageno}&q={query} + url_xpath: //a[contains(@class, "word__name--")]/@href + title_xpath: //a[contains(@class, "word__name--")] + content_xpath: //section[contains(@class, "word__defination")] + first_page_num: 1 + shortcut: et + categories: [dictionaries] + about: + website: https://www.etymonline.com/ + wikidata_id: Q1188617 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + # - name: ebay + # engine: ebay + # shortcut: eb + # base_url: 'https://www.ebay.com' + # disabled: true + # timeout: 5 + + - name: 1x + engine: www1x + shortcut: 1x + timeout: 3.0 + disabled: true + + - name: fdroid + engine: fdroid + shortcut: fd + disabled: true + + - name: findthatmeme + engine: findthatmeme + shortcut: ftm + disabled: true + + - name: flickr + categories: images + shortcut: fl + # You can use the engine using the official stable API, but you need an API + # key, see: https://www.flickr.com/services/apps/create/ + # engine: flickr + # api_key: 'apikey' # required! + # Or you can use the html non-stable engine, activated by default + engine: flickr_noapi + + - name: free software directory + engine: mediawiki + shortcut: fsd + categories: [it, software wikis] + base_url: https://directory.fsf.org/ + search_type: title + timeout: 5.0 + disabled: true + about: + website: https://directory.fsf.org/ + wikidata_id: Q2470288 + + # - name: freesound + # engine: freesound + # shortcut: fnd + # disabled: true + # timeout: 15.0 + # API key required, see: https://freesound.org/docs/api/overview.html + # api_key: MyAPIkey + + - name: frinkiac + engine: frinkiac + shortcut: frk + disabled: true + + - name: fyyd + engine: fyyd + shortcut: fy + timeout: 8.0 + disabled: true + + - name: geizhals + engine: geizhals + shortcut: geiz + disabled: true + + - name: genius + engine: genius + shortcut: gen + + - name: gentoo + engine: mediawiki + shortcut: ge + categories: ["it", "software wikis"] + base_url: "https://wiki.gentoo.org/" + api_path: "api.php" + search_type: text + timeout: 10 + + - name: gitlab + engine: gitlab + base_url: https://gitlab.com + shortcut: gl + disabled: true + about: + website: https://gitlab.com/ + wikidata_id: Q16639197 + + # - name: gnome + # engine: gitlab + # base_url: https://gitlab.gnome.org + # shortcut: gn + # about: + # website: https://gitlab.gnome.org + # wikidata_id: Q44316 + + - name: github + engine: github + shortcut: gh + + - name: github code + engine: github_code + shortcut: ghc + disabled: true + ghc_auth: + # type is one of: + # * none + # * personal_access_token + # * bearer + # When none is passed, the token is not requried. + type: "none" + token: "token" + # specify whether to highlight the matching lines to the query + ghc_highlight_matching_lines: true + ghc_strip_new_lines: true + ghc_strip_whitespace: false + timeout: 10.0 + + - name: codeberg + # https://docs.searxng.org/dev/engines/online/gitea.html + engine: gitea + base_url: https://codeberg.org + shortcut: cb + disabled: true + + - name: gitea.com + engine: gitea + base_url: https://gitea.com + shortcut: gitea + disabled: true + + - name: goodreads + engine: goodreads + shortcut: good + timeout: 4.0 + disabled: true + + - name: google + engine: google + shortcut: go + # additional_tests: + # android: *test_android + + - name: google images + engine: google_images + shortcut: goi + # additional_tests: + # android: *test_android + # dali: + # matrix: + # query: ['Dali Christ'] + # lang: ['en', 'de', 'fr', 'zh-CN'] + # result_container: + # - ['one_title_contains', 'Salvador'] + + - name: google news + engine: google_news + shortcut: gon + # additional_tests: + # android: *test_android + + - name: google videos + engine: google_videos + shortcut: gov + # additional_tests: + # android: *test_android + + - name: google scholar + engine: google_scholar + shortcut: gos + + - name: google play apps + engine: google_play + categories: [files, apps] + shortcut: gpa + play_categ: apps + disabled: true + + - name: google play movies + engine: google_play + categories: videos + shortcut: gpm + play_categ: movies + disabled: true + + - name: material icons + engine: material_icons + shortcut: mi + disabled: true + + - name: habrahabr + engine: xpath + paging: true + search_url: https://habr.com/en/search/page{pageno}/?q={query} + results_xpath: //article[contains(@class, "tm-articles-list__item")] + url_xpath: .//a[@class="tm-title__link"]/@href + title_xpath: .//a[@class="tm-title__link"] + content_xpath: .//div[contains(@class, "article-formatted-body")] + categories: it + timeout: 4.0 + disabled: true + shortcut: habr + about: + website: https://habr.com/ + wikidata_id: Q4494434 + official_api_documentation: https://habr.com/en/docs/help/api/ + use_official_api: false + require_api_key: false + results: HTML + + - name: hackernews + engine: hackernews + shortcut: hn + disabled: true + + - name: hex + engine: hex + shortcut: hex + disabled: true + # Valid values: name inserted_at updated_at total_downloads recent_downloads + sort_criteria: "recent_downloads" + page_size: 10 + + - name: crates.io + engine: crates + shortcut: crates + disabled: true + timeout: 6.0 + + - name: hoogle + engine: xpath + search_url: https://hoogle.haskell.org/?hoogle={query} + results_xpath: '//div[@class="result"]' + title_xpath: './/div[@class="ans"]//a' + url_xpath: './/div[@class="ans"]//a/@href' + content_xpath: './/div[@class="from"]' + page_size: 20 + categories: [it, packages] + shortcut: ho + about: + website: https://hoogle.haskell.org/ + wikidata_id: Q34010 + official_api_documentation: https://hackage.haskell.org/api + use_official_api: false + require_api_key: false + results: JSON + + - name: il post + engine: il_post + shortcut: pst + disabled: true + + - name: huggingface + engine: huggingface + shortcut: hf + disabled: true + + - name: huggingface datasets + huggingface_endpoint: datasets + engine: huggingface + shortcut: hfd + disabled: true + + - name: huggingface spaces + huggingface_endpoint: spaces + engine: huggingface + shortcut: hfs + disabled: true + + - name: imdb + engine: imdb + shortcut: imdb + timeout: 6.0 + disabled: true + + - name: imgur + engine: imgur + shortcut: img + disabled: true + + - name: ina + engine: ina + shortcut: in + timeout: 6.0 + disabled: true + + # - name: invidious + # engine: invidious + # # if you want to use invidious with SearXNG you should setup one locally + # # https://github.com/searxng/searxng/issues/2722#issuecomment-2884993248 + # base_url: + # - https://invidious.example1.com + # - https://invidious.example2.com + # shortcut: iv + # timeout: 3.0 + + - name: ipernity + engine: ipernity + shortcut: ip + disabled: true + + - name: iqiyi + engine: iqiyi + shortcut: iq + disabled: true + + - name: jisho + engine: jisho + shortcut: js + timeout: 3.0 + disabled: true + + - name: kickass + engine: kickass + base_url: + - https://kickasstorrents.to + - https://kickasstorrents.cr + - https://kickasstorrent.cr + - https://kickass.sx + - https://kat.am + shortcut: kc + timeout: 4.0 + + - name: lemmy communities + engine: lemmy + lemmy_type: Communities + shortcut: leco + + - name: lemmy users + engine: lemmy + network: lemmy communities + lemmy_type: Users + shortcut: leus + + - name: lemmy posts + engine: lemmy + network: lemmy communities + lemmy_type: Posts + shortcut: lepo + + - name: lemmy comments + engine: lemmy + network: lemmy communities + lemmy_type: Comments + shortcut: lecom + + - name: library genesis + engine: xpath + # search_url: https://libgen.is/search.php?req={query} + search_url: https://libgen.rs/search.php?req={query} + url_xpath: //a[contains(@href,"book/index.php?md5")]/@href + title_xpath: //a[contains(@href,"book/")]/text()[1] + content_xpath: //td/a[1][contains(@href,"=author")]/text() + categories: files + timeout: 7.0 + disabled: true + shortcut: lg + about: + website: https://libgen.fun/ + wikidata_id: Q22017206 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: z-library + engine: zlibrary + shortcut: zlib + timeout: 7.0 + disabled: true + # https://github.com/searxng/searxng/issues/3610 + inactive: true + + - name: library of congress + engine: loc + shortcut: loc + categories: images + disabled: true + + - name: libretranslate + engine: libretranslate + # https://github.com/LibreTranslate/LibreTranslate?tab=readme-ov-file#mirrors + base_url: + - https://libretranslate.com/translate + # api_key: abc123 + shortcut: lt + disabled: true + + - name: lingva + engine: lingva + shortcut: lv + # set lingva instance in url, by default it will use the official instance + # url: https://lingva.thedaviddelta.com + + - name: lobste.rs + engine: xpath + search_url: https://lobste.rs/search?q={query}&what=stories&order=relevance + results_xpath: //li[contains(@class, "story")] + url_xpath: .//a[@class="u-url"]/@href + title_xpath: .//a[@class="u-url"] + content_xpath: .//a[@class="domain"] + categories: it + shortcut: lo + timeout: 5.0 + disabled: true + about: + website: https://lobste.rs/ + wikidata_id: Q60762874 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: marginalia + engine: marginalia + shortcut: mar + # To get an API key, please follow the instructions at + # - https://about.marginalia-search.com/article/api/ + # api_key: ... + disabled: true + inactive: true + + - name: mastodon users + engine: mastodon + mastodon_type: accounts + base_url: https://mastodon.social + shortcut: mau + + - name: mastodon hashtags + engine: mastodon + mastodon_type: hashtags + base_url: https://mastodon.social + shortcut: mah + + # - name: matrixrooms + # engine: mrs + # # https://docs.searxng.org/dev/engines/online/mrs.html + # # base_url: https://mrs-api-host + # shortcut: mtrx + # disabled: true + + - name: mdn + shortcut: mdn + engine: json_engine + categories: [it] + paging: true + search_url: https://developer.mozilla.org/api/v1/search?q={query}&page={pageno} + results_query: documents + url_query: mdn_url + url_prefix: https://developer.mozilla.org + title_query: title + content_query: summary + about: + website: https://developer.mozilla.org + wikidata_id: Q3273508 + official_api_documentation: null + use_official_api: false + require_api_key: false + results: JSON + + - name: metacpan + engine: metacpan + shortcut: cpan + disabled: true + number_of_results: 20 + + # https://docs.searxng.org/dev/engines/offline/search-indexer-engines.html#module-searx.engines.meilisearch + # - name: meilisearch + # engine: meilisearch + # shortcut: mes + # enable_http: true + # base_url: http://localhost:7700 + # index: my-index + # auth_key: Bearer XXXX + + - name: microsoft learn + engine: microsoft_learn + shortcut: msl + disabled: true + + - name: mixcloud + engine: mixcloud + shortcut: mc + + # MongoDB engine + # Required dependency: pymongo + # - name: mymongo + # engine: mongodb + # shortcut: md + # exact_match_only: false + # host: '127.0.0.1' + # port: 27017 + # enable_http: true + # results_per_page: 20 + # database: 'business' + # collection: 'reviews' # name of the db collection + # key: 'name' # key in the collection to search for + + - name: mozhi + engine: mozhi + base_url: + - https://mozhi.aryak.me + - https://translate.bus-hit.me + - https://nyc1.mz.ggtyler.dev + # mozhi_engine: google - see https://mozhi.aryak.me for supported engines + timeout: 4.0 + shortcut: mz + disabled: true + + - name: mwmbl + engine: mwmbl + # api_url: https://api.mwmbl.org + shortcut: mwm + disabled: true + + - name: niconico + engine: niconico + shortcut: nico + disabled: true + + - name: npm + engine: npm + shortcut: npm + timeout: 5.0 + disabled: true + + - name: nyaa + engine: nyaa + shortcut: nt + disabled: true + + - name: mankier + engine: json_engine + search_url: https://www.mankier.com/api/v2/mans/?q={query} + results_query: results + url_query: url + title_query: name + content_query: description + categories: it + shortcut: man + about: + website: https://www.mankier.com/ + official_api_documentation: https://www.mankier.com/api + use_official_api: true + require_api_key: false + results: JSON + + # https://docs.searxng.org/dev/engines/online/mullvad_leta.html + - name: mullvadleta + engine: mullvad_leta + disabled: true + leta_engine: google + categories: [general, web] + shortcut: ml + + - name: mullvadleta brave + engine: mullvad_leta + network: mullvadleta + disabled: true + leta_engine: brave + categories: [general, web] + shortcut: mlb + + - name: odysee + engine: odysee + shortcut: od + disabled: true + + - name: ollama + engine: ollama + shortcut: ollama + disabled: true + + - name: openairedatasets + engine: json_engine + paging: true + search_url: https://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query} + results_query: response/results/result + url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ + title_query: metadata/oaf:entity/oaf:result/title/$ + content_query: metadata/oaf:entity/oaf:result/description/$ + content_html_to_text: true + categories: "science" + shortcut: oad + timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON + + - name: openairepublications + engine: json_engine + paging: true + search_url: https://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query} + results_query: response/results/result + url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ + title_query: metadata/oaf:entity/oaf:result/title/$ + content_query: metadata/oaf:entity/oaf:result/description/$ + content_html_to_text: true + categories: science + shortcut: oap + timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON + + - name: openalex + engine: openalex + shortcut: oa + # https://docs.searxng.org/dev/engines/online/openalex.html + # Recommended by OpenAlex: join the polite pool with an email address + # mailto: "[email protected]" + timeout: 5.0 + disabled: true + + - name: openclipart + engine: openclipart + shortcut: ocl + inactive: true + disabled: true + timeout: 30 + + - name: openlibrary + engine: openlibrary + shortcut: ol + timeout: 10 + disabled: true + + - name: openmeteo + engine: open_meteo + shortcut: om + disabled: true + + # - name: opensemanticsearch + # engine: opensemantic + # shortcut: oss + # base_url: 'http://localhost:8983/solr/opensemanticsearch/' + + - name: openstreetmap + engine: openstreetmap + shortcut: osm + + - name: openrepos + engine: xpath + paging: true + search_url: https://openrepos.net/search/node/{query}?page={pageno} + url_xpath: //li[@class="search-result"]//h3[@class="title"]/a/@href + title_xpath: //li[@class="search-result"]//h3[@class="title"]/a + content_xpath: //li[@class="search-result"]//div[@class="search-snippet-info"]//p[@class="search-snippet"] + categories: files + timeout: 4.0 + disabled: true + shortcut: or + about: + website: https://openrepos.net/ + wikidata_id: + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: packagist + engine: json_engine + paging: true + search_url: https://packagist.org/search.json?q={query}&page={pageno} + results_query: results + url_query: url + title_query: name + content_query: description + categories: [it, packages] + disabled: true + timeout: 5.0 + shortcut: pack + about: + website: https://packagist.org + wikidata_id: Q108311377 + official_api_documentation: https://packagist.org/apidoc + use_official_api: true + require_api_key: false + results: JSON + + - name: pdbe + engine: pdbe + shortcut: pdb + # Hide obsolete PDB entries. Default is not to hide obsolete structures + # hide_obsolete: false + + - name: photon + engine: photon + shortcut: ph + + - name: pinterest + engine: pinterest + shortcut: pin + + - name: piped + engine: piped + shortcut: ppd + categories: videos + piped_filter: videos + timeout: 3.0 + inactive: true + + # URL to use as link and for embeds + frontend_url: https://srv.piped.video + # Instance will be selected randomly, for more see https://piped-instances.kavin.rocks/ + backend_url: + - https://pipedapi.ducks.party + - https://api.piped.private.coffee + + - name: piped.music + engine: piped + network: piped + shortcut: ppdm + categories: music + piped_filter: music_songs + timeout: 3.0 + inactive: true + + - name: piratebay + engine: piratebay + shortcut: tpb + # You may need to change this URL to a proxy if piratebay is blocked in your + # country + url: https://thepiratebay.org/ + timeout: 3.0 + + - name: pixabay images + engine: pixabay + pixabay_type: images + categories: images + shortcut: pixi + disabled: true + + - name: pixabay videos + engine: pixabay + pixabay_type: videos + categories: videos + shortcut: pixv + disabled: true + + - name: pixiv + shortcut: pv + engine: pixiv + disabled: true + inactive: true + pixiv_image_proxies: + - https://pximg.example.org + # A proxy is required to load the images. Hosting an image proxy server + # for Pixiv: + # --> https://pixivfe.pages.dev/hosting-image-proxy-server/ + # Proxies from public instances. Ask the public instances owners if they + # agree to receive traffic from SearXNG! + # --> https://codeberg.org/VnPower/PixivFE#instances + # --> https://github.com/searxng/searxng/pull/3192#issuecomment-1941095047 + # image proxy of https://pixiv.cat + # - https://i.pixiv.cat + # image proxy of https://www.pixiv.pics + # - https://pximg.cocomi.eu.org + # image proxy of https://pixivfe.exozy.me + # - https://pximg.exozy.me + # image proxy of https://pixivfe.ducks.party + # - https://pixiv.ducks.party + # image proxy of https://pixiv.perennialte.ch + # - https://pximg.perennialte.ch + + - name: podcastindex + engine: podcastindex + shortcut: podcast + + # Required dependency: psychopg2 + # - name: postgresql + # engine: postgresql + # database: postgres + # username: postgres + # password: postgres + # limit: 10 + # query_str: 'SELECT * from my_table WHERE my_column = %(query)s' + # shortcut : psql + + - name: presearch + engine: presearch + search_type: search + categories: [general, web] + shortcut: ps + timeout: 4.0 + disabled: true + + - name: presearch images + engine: presearch + network: presearch + search_type: images + categories: [images, web] + timeout: 4.0 + shortcut: psimg + disabled: true + + - name: presearch videos + engine: presearch + network: presearch + search_type: videos + categories: [general, web] + timeout: 4.0 + shortcut: psvid + disabled: true + + - name: presearch news + engine: presearch + network: presearch + search_type: news + categories: [news, web] + timeout: 4.0 + shortcut: psnews + disabled: true + + - name: pub.dev + engine: xpath + shortcut: pd + search_url: https://pub.dev/packages?q={query}&page={pageno} + paging: true + results_xpath: //div[contains(@class,"packages-item")] + url_xpath: ./div/h3/a/@href + title_xpath: ./div/h3/a + content_xpath: ./div/div/div[contains(@class,"packages-description")]/span + categories: [packages, it] + timeout: 3.0 + disabled: true + first_page_num: 1 + about: + website: https://pub.dev/ + official_api_documentation: https://pub.dev/help/api + use_official_api: false + require_api_key: false + results: HTML + + - name: public domain image archive + engine: public_domain_image_archive + shortcut: pdia + disabled: true + + - name: pubmed + engine: pubmed + shortcut: pub + + - name: pypi + shortcut: pypi + engine: pypi + + - name: quark + quark_category: general + categories: [general] + engine: quark + shortcut: qk + disabled: true + + - name: quark images + quark_category: images + categories: [images] + engine: quark + shortcut: qki + disabled: true + + - name: qwant + qwant_categ: web + engine: qwant + shortcut: qw + categories: [general, web] + disabled: true + additional_tests: + rosebud: *test_rosebud + + - name: qwant news + qwant_categ: news + engine: qwant + shortcut: qwn + categories: news + network: qwant + + - name: qwant images + qwant_categ: images + engine: qwant + shortcut: qwi + categories: [images, web] + network: qwant + + - name: qwant videos + qwant_categ: videos + engine: qwant + shortcut: qwv + categories: [videos, web] + network: qwant + + # - name: library + # engine: recoll + # shortcut: lib + # base_url: 'https://recoll.example.org/' + # search_dir: '' + # mount_prefix: /export + # dl_prefix: 'https://download.example.org' + # timeout: 30.0 + # categories: files + # disabled: true + + # - name: recoll library reference + # engine: recoll + # base_url: 'https://recoll.example.org/' + # search_dir: reference + # mount_prefix: /export + # dl_prefix: 'https://download.example.org' + # shortcut: libr + # timeout: 30.0 + # categories: files + # disabled: true + + - name: radio browser + engine: radio_browser + shortcut: rb + + - name: reddit + engine: reddit + shortcut: re + page_size: 25 + disabled: true + + - name: reuters + engine: reuters + shortcut: reu + # https://docs.searxng.org/dev/engines/online/reuters.html + # sort_order = "relevance" + + - name: right dao + engine: xpath + paging: true + page_size: 12 + search_url: https://rightdao.com/search?q={query}&start={pageno} + results_xpath: //div[contains(@class, "description")] + url_xpath: ../div[contains(@class, "title")]/a/@href + title_xpath: ../div[contains(@class, "title")] + content_xpath: . + categories: general + shortcut: rd + disabled: true + about: + website: https://rightdao.com/ + use_official_api: false + require_api_key: false + results: HTML + + - name: rottentomatoes + engine: rottentomatoes + shortcut: rt + disabled: true + + # Required dependency: valkey + # - name: myvalkey + # shortcut : rds + # engine: valkey_server + # exact_match_only: false + # host: '127.0.0.1' + # port: 6379 + # enable_http: true + # password: '' + # db: 0 + + # tmp suspended: bad certificate + # - name: scanr structures + # shortcut: scs + # engine: scanr_structures + # disabled: true + + - name: searchmysite + engine: xpath + shortcut: sms + categories: general + paging: true + search_url: https://searchmysite.net/search/?q={query}&page={pageno} + results_xpath: //div[contains(@class,'search-result')] + url_xpath: .//a[contains(@class,'result-link')]/@href + title_xpath: .//span[contains(@class,'result-title-txt')]/text() + content_xpath: ./p[@id='result-hightlight'] + disabled: true + about: + website: https://searchmysite.net + + - name: selfhst icons + engine: selfhst + shortcut: si + disabled: true + + - name: sepiasearch + engine: sepiasearch + shortcut: sep + + - name: sogou + engine: sogou + shortcut: sogou + disabled: true + + - name: sogou images + engine: sogou_images + shortcut: sogoui + disabled: true + + - name: sogou videos + engine: sogou_videos + shortcut: sogouv + disabled: true + + - name: sogou wechat + engine: sogou_wechat + shortcut: sogouw + disabled: true + + - name: soundcloud + engine: soundcloud + shortcut: sc + + - name: stackoverflow + engine: stackexchange + shortcut: st + api_site: 'stackoverflow' + categories: [it, q&a] + + - name: askubuntu + engine: stackexchange + shortcut: ubuntu + api_site: 'askubuntu' + categories: [it, q&a] + + - name: superuser + engine: stackexchange + shortcut: su + api_site: 'superuser' + categories: [it, q&a] + + - name: discuss.python + engine: discourse + shortcut: dpy + base_url: 'https://discuss.python.org' + categories: [it, q&a] + disabled: true + + - name: caddy.community + engine: discourse + shortcut: caddy + base_url: 'https://caddy.community' + categories: [it, q&a] + disabled: true + + - name: pi-hole.community + engine: discourse + shortcut: pi + categories: [it, q&a] + base_url: 'https://discourse.pi-hole.net' + disabled: true + + - name: searchcode code + engine: searchcode_code + shortcut: scc + disabled: true + inactive: true + + # - name: searx + # engine: searx_engine + # shortcut: se + # instance_urls : + # - http://127.0.0.1:8888/ + # - ... + # disabled: true + + - name: semantic scholar + engine: semantic_scholar + shortcut: se + + # Spotify needs API credentials + # - name: spotify + # engine: spotify + # shortcut: stf + # api_client_id: ******* + # api_client_secret: ******* + + # - name: solr + # engine: solr + # shortcut: slr + # base_url: http://localhost:8983 + # collection: collection_name + # sort: '' # sorting: asc or desc + # field_list: '' # comma separated list of field names to display on the UI + # default_fields: '' # default field to query + # query_fields: '' # query fields + # enable_http: true + + - name: springer nature + engine: springer + shortcut: springer + timeout: 5 + # read https://docs.searxng.org/dev/engines/online/springer.html + api_key: "" + inactive: true + + - name: startpage + engine: startpage + shortcut: sp + startpage_categ: web + categories: [general, web] + additional_tests: + rosebud: *test_rosebud + + - name: startpage news + engine: startpage + startpage_categ: news + categories: [news, web] + shortcut: spn + + - name: startpage images + engine: startpage + startpage_categ: images + categories: [images, web] + shortcut: spi + + - name: steam + engine: steam + shortcut: stm + disabled: true + + - name: tokyotoshokan + engine: tokyotoshokan + shortcut: tt + timeout: 6.0 + disabled: true + + - name: solidtorrents + engine: solidtorrents + shortcut: solid + timeout: 4.0 + base_url: + - https://solidtorrents.to + - https://bitsearch.to + + # For this demo of the sqlite engine download: + # https://liste.mediathekview.de/filmliste-v2.db.bz2 + # and unpack into searx/data/filmliste-v2.db + # Query to test: "!mediathekview concert" + # + # - name: mediathekview + # engine: sqlite + # shortcut: mediathekview + # categories: [general, videos] + # result_type: MainResult + # database: searx/data/filmliste-v2.db + # query_str: >- + # SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title, + # COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url, + # description AS content + # FROM film + # WHERE title LIKE :wildcard OR description LIKE :wildcard + # ORDER BY duration DESC + + - name: tagesschau + engine: tagesschau + # when set to false, display URLs from Tagesschau, and not the actual source + # (e.g. NDR, WDR, SWR, HR, ...) + use_source_url: true + shortcut: ts + disabled: true + + - name: tmdb + engine: xpath + paging: true + categories: movies + search_url: https://www.themoviedb.org/search?page={pageno}&query={query} + results_xpath: //div[contains(@class,"movie") or contains(@class,"tv")]//div[contains(@class,"card")] + url_xpath: .//div[contains(@class,"poster")]/a/@href + thumbnail_xpath: .//img/@src + title_xpath: .//div[contains(@class,"title")]//h2 + content_xpath: .//div[contains(@class,"overview")] + shortcut: tm + disabled: true + + # Requires Tor + - name: torch + engine: xpath + paging: true + search_url: + http://xmh57jrknzkhv6y3ls3ubitzfqnkrwxhopf5aygthi7d6rplyvk3noyd.onion/cgi-bin/omega/omega?P={query}&DEFAULTOP=and + results_xpath: //table//tr + url_xpath: ./td[2]/a + title_xpath: ./td[2]/b + content_xpath: ./td[2]/small + categories: onions + enable_http: true + shortcut: tch + + # TubeArchivist is a self-hosted Youtube archivist software. + # https://docs.searxng.org/dev/engines/online/tubearchivist.html + # + # - name: tubearchivist + # engine: tubearchivist + # shortcut: tuba + # base_url: + # ta_token: + # ta_link_to_mp4: false + + # torznab engine lets you query any torznab compatible indexer. Using this + # engine in combination with Jackett opens the possibility to query a lot of + # public and private indexers directly from SearXNG. More details at: + # https://docs.searxng.org/dev/engines/online/torznab.html + # + # - name: Torznab EZTV + # engine: torznab + # shortcut: eztv + # base_url: http://localhost:9117/api/v2.0/indexers/eztv/results/torznab + # enable_http: true # if using localhost + # api_key: xxxxxxxxxxxxxxx + # show_magnet_links: true + # show_torrent_files: false + # # https://github.com/Jackett/Jackett/wiki/Jackett-Categories + # torznab_categories: # optional + # - 2000 + # - 5000 + + # tmp suspended - too slow, too many errors + # - name: urbandictionary + # engine : xpath + # search_url : https://www.urbandictionary.com/define.php?term={query} + # url_xpath : //*[@class="word"]/@href + # title_xpath : //*[@class="def-header"] + # content_xpath: //*[@class="meaning"] + # shortcut: ud + + - name: unsplash + engine: unsplash + shortcut: us + + - name: yandex + engine: yandex + categories: general + search_type: web + shortcut: yd + disabled: true + inactive: true + + - name: yandex images + engine: yandex + categories: images + search_type: images + shortcut: ydi + disabled: true + inactive: true + + - name: yandex music + engine: yandex_music + shortcut: ydm + disabled: true + # https://yandex.com/support/music/access.html + inactive: true + + - name: yahoo + engine: yahoo + shortcut: yh + disabled: true + + - name: yahoo news + engine: yahoo_news + shortcut: yhn + + - name: youtube + shortcut: yt + # You can use the engine using the official stable API, but you need an API + # key See: https://console.developers.google.com/project + # + # engine: youtube_api + # api_key: 'apikey' # required! + # + # Or you can use the html non-stable engine, activated by default + engine: youtube_noapi + + - name: dailymotion + engine: dailymotion + shortcut: dm + + - name: vimeo + engine: vimeo + shortcut: vm + + - name: wiby + engine: json_engine + paging: true + search_url: https://wiby.me/json/?q={query}&p={pageno} + url_query: URL + title_query: Title + content_query: Snippet + categories: [general, web] + shortcut: wib + disabled: true + about: + website: https://wiby.me/ + + - name: wikibooks + engine: mediawiki + weight: 0.5 + shortcut: wb + categories: [general, wikimedia] + base_url: "https://{language}.wikibooks.org/" + search_type: text + disabled: true + about: + website: https://www.wikibooks.org/ + wikidata_id: Q367 + + - name: wikinews + engine: mediawiki + shortcut: wn + categories: [news, wikimedia] + base_url: "https://{language}.wikinews.org/" + search_type: text + srsort: create_timestamp_desc + about: + website: https://www.wikinews.org/ + wikidata_id: Q964 + + - name: wikiquote + engine: mediawiki + weight: 0.5 + shortcut: wq + categories: [general, wikimedia] + base_url: "https://{language}.wikiquote.org/" + search_type: text + disabled: true + additional_tests: + rosebud: *test_rosebud + about: + website: https://www.wikiquote.org/ + wikidata_id: Q369 + + - name: wikisource + engine: mediawiki + weight: 0.5 + shortcut: ws + categories: [general, wikimedia] + base_url: "https://{language}.wikisource.org/" + search_type: text + disabled: true + about: + website: https://www.wikisource.org/ + wikidata_id: Q263 + + - name: wikispecies + engine: mediawiki + shortcut: wsp + categories: [general, science, wikimedia] + base_url: "https://species.wikimedia.org/" + search_type: text + disabled: true + about: + website: https://species.wikimedia.org/ + wikidata_id: Q13679 + tests: + wikispecies: + matrix: + query: "Campbell, L.I. et al. 2011: MicroRNAs" + lang: en + result_container: + - not_empty + - ['one_title_contains', 'Tardigrada'] + test: + - unique_results + + - name: wiktionary + engine: mediawiki + shortcut: wt + categories: [dictionaries, wikimedia] + base_url: "https://{language}.wiktionary.org/" + search_type: text + about: + website: https://www.wiktionary.org/ + wikidata_id: Q151 + + - name: wikiversity + engine: mediawiki + weight: 0.5 + shortcut: wv + categories: [general, wikimedia] + base_url: "https://{language}.wikiversity.org/" + search_type: text + disabled: true + about: + website: https://www.wikiversity.org/ + wikidata_id: Q370 + + - name: wikivoyage + engine: mediawiki + weight: 0.5 + shortcut: wy + categories: [general, wikimedia] + base_url: "https://{language}.wikivoyage.org/" + search_type: text + disabled: true + about: + website: https://www.wikivoyage.org/ + wikidata_id: Q373 + + - name: wikicommons.images + engine: wikicommons + shortcut: wci + categories: images + wc_search_type: image + + - name: wikicommons.videos + engine: wikicommons + shortcut: wcv + categories: videos + wc_search_type: video + + - name: wikicommons.audio + engine: wikicommons + shortcut: wca + categories: music + wc_search_type: audio + + - name: wikicommons.files + engine: wikicommons + shortcut: wcf + categories: files + wc_search_type: file + + - name: wolframalpha + shortcut: wa + # You can use the engine using the official stable API, but you need an API + # key. See: https://products.wolframalpha.com/api/ + # + # engine: wolframalpha_api + # api_key: '' + # + # Or you can use the html non-stable engine, activated by default + engine: wolframalpha_noapi + timeout: 6.0 + categories: general + disabled: true + + - name: dictzone + engine: dictzone + shortcut: dc + + - name: mymemory translated + engine: translated + shortcut: tl + timeout: 5.0 + # You can use without an API key, but you are limited to 1000 words/day + # See: https://mymemory.translated.net/doc/usagelimits.php + # api_key: '' + + # Required dependency: mysql-connector-python + # - name: mysql + # engine: mysql_server + # database: mydatabase + # username: user + # password: pass + # limit: 10 + # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s' + # shortcut: mysql + + # Required dependency: mariadb + # - name: mariadb + # engine: mariadb_server + # database: mydatabase + # username: user + # password: pass + # limit: 10 + # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s' + # shortcut: mdb + + - name: 1337x + engine: 1337x + shortcut: 1337x + disabled: true + + - name: duden + engine: duden + shortcut: du + disabled: true + + - name: seznam + shortcut: szn + engine: seznam + disabled: true + + # - name: deepl + # engine: deepl + # shortcut: dpl + # # You can use the engine using the official stable API, but you need an API key + # # See: https://www.deepl.com/pro-api?cta=header-pro-api + # api_key: '' # required! + # timeout: 5.0 + # disabled: true + + - name: mojeek + shortcut: mjk + engine: mojeek + categories: [general, web] + disabled: true + + - name: mojeek images + shortcut: mjkimg + engine: mojeek + categories: [images, web] + search_type: images + paging: false + disabled: true + + - name: mojeek news + shortcut: mjknews + engine: mojeek + categories: [news, web] + search_type: news + paging: false + disabled: true + + - name: moviepilot + engine: moviepilot + shortcut: mp + disabled: true + + - name: naver + categories: [general, web] + engine: naver + shortcut: nvr + disabled: true + + - name: naver images + naver_category: images + categories: [images] + engine: naver + shortcut: nvri + disabled: true + + - name: naver news + naver_category: news + categories: [news] + engine: naver + shortcut: nvrn + disabled: true + + - name: naver videos + naver_category: videos + categories: [videos] + engine: naver + shortcut: nvrv + disabled: true + + - name: rubygems + shortcut: rbg + engine: xpath + paging: true + search_url: https://rubygems.org/search?page={pageno}&query={query} + results_xpath: /html/body/main/div/a[@class="gems__gem"] + url_xpath: ./@href + title_xpath: ./span/h2 + content_xpath: ./span/p + suggestion_xpath: /html/body/main/div/div[@class="search__suggestions"]/p/a + first_page_num: 1 + categories: [it, packages] + disabled: true + about: + website: https://rubygems.org/ + wikidata_id: Q1853420 + official_api_documentation: https://guides.rubygems.org/rubygems-org-api/ + use_official_api: false + require_api_key: false + results: HTML + + - name: peertube + engine: peertube + shortcut: ptb + paging: true + # alternatives see: https://instances.joinpeertube.org/instances + # base_url: https://tube.4aem.com + categories: videos + disabled: true + timeout: 6.0 + + - name: mediathekviewweb + engine: mediathekviewweb + shortcut: mvw + disabled: true + + - name: yacy + # https://docs.searxng.org/dev/engines/online/yacy.html + engine: yacy + categories: general + search_type: text + # see https://github.com/searxng/searxng/pull/3631#issuecomment-2240903027 + base_url: + - https://yacy.searchlab.eu + shortcut: ya + disabled: true + # if you aren't using HTTPS for your local yacy instance disable https + # enable_http: false + search_mode: 'global' + # timeout can be reduced in 'local' search mode + timeout: 5.0 + + - name: yacy images + engine: yacy + network: yacy + categories: images + search_type: image + shortcut: yai + disabled: true + # timeout can be reduced in 'local' search mode + timeout: 5.0 + + - name: rumble + engine: rumble + shortcut: ru + base_url: https://rumble.com/ + paging: true + categories: videos + disabled: true + + - name: repology + engine: repology + shortcut: rep + disabled: true + inactive: true + + - name: livespace + engine: livespace + shortcut: ls + categories: videos + disabled: true + timeout: 5.0 + + - name: wordnik + engine: wordnik + shortcut: wnik + timeout: 5.0 + + - name: woxikon.de synonyme + engine: xpath + shortcut: woxi + categories: [dictionaries] + timeout: 5.0 + disabled: true + search_url: https://synonyme.woxikon.de/synonyme/{query}.php + url_xpath: //div[@class="upper-synonyms"]/a/@href + content_xpath: //div[@class="synonyms-list-group"] + title_xpath: //div[@class="upper-synonyms"]/a + no_result_for_http_status: [404] + about: + website: https://www.woxikon.de/ + wikidata_id: # No Wikidata ID + use_official_api: false + require_api_key: false + results: HTML + language: de + + - name: seekr news + engine: seekr + shortcut: senews + categories: news + seekr_category: news + disabled: true + + - name: seekr images + engine: seekr + network: seekr news + shortcut: seimg + categories: images + seekr_category: images + disabled: true + + - name: seekr videos + engine: seekr + network: seekr news + shortcut: sevid + categories: videos + seekr_category: videos + disabled: true + + - name: stract + engine: stract + shortcut: str + disabled: true + + - name: svgrepo + engine: svgrepo + shortcut: svg + timeout: 10.0 + disabled: true + + - name: tootfinder + engine: tootfinder + shortcut: toot + + - name: uxwing + engine: uxwing + shortcut: ux + disabled: true + + - name: voidlinux + engine: voidlinux + shortcut: void + disabled: true + + - name: wallhaven + engine: wallhaven + # api_key: abcdefghijklmnopqrstuvwxyz + shortcut: wh + disabled: true + + # wikimini: online encyclopedia for children + # The fulltext and title parameter is necessary for Wikimini because + # sometimes it will not show the results and redirect instead + - name: wikimini + engine: xpath + shortcut: wkmn + search_url: https://fr.wikimini.org/w/index.php?search={query}&title=Sp%C3%A9cial%3ASearch&fulltext=Search + url_xpath: //li/div[@class="mw-search-result-heading"]/a/@href + title_xpath: //li//div[@class="mw-search-result-heading"]/a + content_xpath: //li/div[@class="searchresult"] + categories: general + disabled: true + about: + website: https://wikimini.org/ + wikidata_id: Q3568032 + use_official_api: false + require_api_key: false + results: HTML + language: fr + + - name: wttr.in + engine: wttr + shortcut: wttr + timeout: 9.0 + + - name: brave + engine: brave + shortcut: br + time_range_support: true + paging: true + categories: [general, web] + brave_category: search + # brave_spellcheck: true + + - name: brave.images + engine: brave + network: brave + shortcut: brimg + categories: [images, web] + brave_category: images + + - name: brave.videos + engine: brave + network: brave + shortcut: brvid + categories: [videos, web] + brave_category: videos + + - name: brave.news + engine: brave + network: brave + shortcut: brnews + categories: news + brave_category: news + + # - name: brave.goggles + # engine: brave + # network: brave + # shortcut: brgog + # time_range_support: true + # paging: true + # categories: [general, web] + # brave_category: goggles + # Goggles: # required! This should be a URL ending in .goggle + + - name: lib.rs + shortcut: lrs + engine: lib_rs + disabled: true + + - name: sourcehut + shortcut: srht + engine: xpath + paging: true + search_url: https://sr.ht/projects?page={pageno}&search={query} + results_xpath: (//div[@class="event-list"])[1]/div[@class="event"] + url_xpath: ./h4/a[2]/@href + title_xpath: ./h4/a[2] + content_xpath: ./p + first_page_num: 1 + categories: [it, repos] + disabled: true + about: + website: https://sr.ht + wikidata_id: Q78514485 + official_api_documentation: https://man.sr.ht/ + use_official_api: false + require_api_key: false + results: HTML + + - name: goo + shortcut: goo + engine: xpath + paging: true + search_url: https://search.goo.ne.jp/web.jsp?MT={query}&FR={pageno}0 + url_xpath: //div[@class="result"]/p[@class='title fsL1']/a/@href + title_xpath: //div[@class="result"]/p[@class='title fsL1']/a + content_xpath: //p[contains(@class,'url fsM')]/following-sibling::p + first_page_num: 0 + categories: [general, web] + disabled: true + timeout: 4.0 + about: + website: https://search.goo.ne.jp + wikidata_id: Q249044 + use_official_api: false + require_api_key: false + results: HTML + language: ja + + - name: bt4g + engine: bt4g + shortcut: bt4g + + - name: pkg.go.dev + engine: pkg_go_dev + shortcut: pgo + disabled: true + + - name: senscritique + engine: senscritique + shortcut: scr + timeout: 4.0 + disabled: true + + - name: minecraft wiki + engine: mediawiki + shortcut: mcw + categories: ["software wikis"] + base_url: https://minecraft.wiki/ + api_path: "api.php" + search_type: text + disabled: true + about: + website: https://minecraft.wiki/ + wikidata_id: Q105533483 + +# Doku engine lets you access to any Doku wiki instance: +# A public one or a privete/corporate one. +# - name: ubuntuwiki +# engine: doku +# shortcut: uw +# base_url: 'https://doc.ubuntu-fr.org' + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: git grep +# engine: command +# command: ['git', 'grep', '{{QUERY}}'] +# shortcut: gg +# tokens: [] +# disabled: true +# delimiter: +# chars: ':' +# keys: ['filepath', 'code'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: locate +# engine: command +# command: ['locate', '{{QUERY}}'] +# shortcut: loc +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: find +# engine: command +# command: ['find', '.', '-name', '{{QUERY}}'] +# query_type: path +# shortcut: fnd +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: pattern search in files +# engine: command +# command: ['fgrep', '{{QUERY}}'] +# shortcut: fgr +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: regex search in files +# engine: command +# command: ['grep', '{{QUERY}}'] +# shortcut: gr +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +doi_resolvers: + oadoi.org: 'https://oadoi.org/' + doi.org: 'https://doi.org/' + sci-hub.se: 'https://sci-hub.se/' + sci-hub.st: 'https://sci-hub.st/' + sci-hub.ru: 'https://sci-hub.ru/' + +default_doi_resolver: 'oadoi.org' diff --git a/mcp_host/shell/Dockerfile b/mcp_host/shell/Dockerfile index b9d8469..5f0538f 100644 --- a/mcp_host/shell/Dockerfile +++ b/mcp_host/shell/Dockerfile @@ -2,15 +2,17 @@ ARG BASE_IMAGE=ubuntu:24.04 FROM ${BASE_IMAGE} RUN apt-get update && apt-get install -y \ - python3 \ - r-base \ + python3.12 \ + python3.12-dev \ + python3.12-venv \ python3-pip \ + r-base \ sudo \ - && rm -rf /var/lib/apt/lists/* + && rm -rf /var/lib/apt/lists/* \ + && python3 --version -# Install Python packages globally as root -COPY requirements.txt /tmp/ -RUN pip3 install --break-system-packages --no-cache-dir -r /tmp/requirements.txt +# install fastmcp and its dependencies +RUN pip3 install --break-system-packages --no-cache-dir fastmcp # Grant ubuntu passwordless sudo access RUN echo "ubuntu ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers diff --git a/mcp_host/skills/scientific-skills/scientific-skills/geospatial-pure-python/SKILL.md b/mcp_host/skills/scientific-skills/scientific-skills/geospatial-pure-python/SKILL.md new file mode 100644 index 0000000..3fdd320 --- /dev/null +++ b/mcp_host/skills/scientific-skills/scientific-skills/geospatial-pure-python/SKILL.md @@ -0,0 +1,476 @@ +--- +name: geospatial-pure-python +description: Pure Python geospatial computation without geopandas or shapely. Provides algorithms for point-in-polygon, distance calculations, GeoJSON reading, spatial joins, and buffer analysis using only numpy, pandas, math, and built-in modules. +license: MIT +metadata: + skill-author: K-Dense Inc. +--- + +# Geospatial Pure Python + +## Overview + +This skill provides pure Python implementations of common geospatial operations when geopandas and shapely are not available due to import restrictions. It uses only whitelisted libraries: numpy, pandas, math, json, and built-in modules. + +## Core Capabilities + +### 1. Point-in-Polygon Algorithm + +Ray casting algorithm for determining if a point is inside a polygon. + +```python +import numpy as np + +def point_in_polygon(point, polygon): + """ + Determine if a point is inside a polygon using ray casting algorithm. + + Args: + point: tuple (x, y) or list [x, y] + polygon: list of tuples [(x1, y1), (x2, y2), ...] forming a closed polygon + (first and last points should be the same) + + Returns: + bool: True if point is inside polygon, False otherwise + """ + x, y = point + n = len(polygon) + inside = False + + p1x, p1y = polygon[0] + for i in range(1, n + 1): + p2x, p2y = polygon[i % n] + if y > min(p1y, p2y): + if y <= max(p1y, p2y): + if x <= max(p1x, p2x): + if p1y != p2y: + xinters = (y - p1y) * (p2x - p1x) / (p2y - p1y) + p1x + if p1x == p2x or x <= xinters: + inside = not inside + p1x, p1y = p2x, p2y + + return inside + +# For multiple points +def points_in_polygon(points, polygon): + """ + Vectorized version for multiple points using numpy. + + Args: + points: numpy array of shape (n, 2) or list of points + polygon: list of vertices as above + + Returns: + numpy array of bools + """ + import numpy as np + points = np.asarray(points) + x, y = points[:, 0], points[:, 1] + n = len(polygon) + inside = np.zeros(len(points), dtype=bool) + + p1x, p1y = polygon[0] + for i in range(1, n + 1): + p2x, p2y = polygon[i % n] + # Find points where y is between p1y and p2y + idx = (y > min(p1y, p2y)) & (y <= max(p1y, p2y)) & (x <= max(p1x, p2x)) + if idx.any(): + if p1y != p2y: + xinters = (y[idx] - p1y) * (p2x - p1x) / (p2y - p1y) + p1x + if p1x == p2x: + inside[idx] = ~inside[idx] + else: + mask = x[idx] <= xinters + inside[idx] = np.where(mask, ~inside[idx], inside[idx]) + p1x, p1y = p2x, p2y + + return inside +``` + +### 2. Distance Between Coordinates (Haversine Formula) + +Calculate great-circle distance between two latitude/longitude points in meters. + +```python +import math + +def haversine_distance(lon1, lat1, lon2, lat2, radius=6371000): + """ + Calculate great-circle distance between two points on Earth. + + Args: + lon1, lat1: longitude and latitude of first point in degrees + lon2, lat2: longitude and latitude of second point in degrees + radius: Earth radius in meters (default 6371000) + + Returns: + float: distance in meters + """ + # Convert degrees to radians + phi1 = math.radians(lat1) + phi2 = math.radians(lat2) + delta_phi = math.radians(lat2 - lat1) + delta_lambda = math.radians(lon2 - lon1) + + # Haversine formula + a = math.sin(delta_phi/2)**2 + math.cos(phi1) * math.cos(phi2) * math.sin(delta_lambda/2)**2 + c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + + return radius * c + +# Vectorized version for many point pairs +def haversine_distance_vectorized(lons1, lats1, lons2, lats2, radius=6371000): + """ + Vectorized haversine distance using numpy. + + Args: + lons1, lats1: arrays of longitudes and latitudes for first points + lons2, lats2: arrays for second points + radius: Earth radius in meters + + Returns: + numpy array of distances in meters + """ + import numpy as np + + phi1 = np.radians(lats1) + phi2 = np.radians(lats2) + delta_phi = np.radians(lats2 - lats1) + delta_lambda = np.radians(lons2 - lons1) + + a = np.sin(delta_phi/2)**2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda/2)**2 + c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a)) + + return radius * c +``` + +### 3. Reading GeoJSON Files + +Parse GeoJSON files using json module and extract geometries and properties. + +```python +import json +import pandas as pd + +def read_geojson(filepath): + """ + Read GeoJSON file and extract features into a pandas DataFrame. + + Args: + filepath: path to GeoJSON file + + Returns: + tuple: (geometries, properties_df) + geometries: list of geometry objects (dict with type and coordinates) + properties_df: pandas DataFrame of feature properties + """ + with open(filepath, 'r') as f: + data = json.load(f) + + if data['type'] == 'FeatureCollection': + features = data['features'] + elif data['type'] == 'Feature': + features = [data] + else: + raise ValueError(f"Unsupported GeoJSON type: {data['type']}") + + geometries = [] + properties = [] + + for feature in features: + geometries.append(feature['geometry']) + properties.append(feature.get('properties', {})) + + properties_df = pd.DataFrame(properties) + return geometries, properties_df + +def extract_coordinates_from_geojson(filepath, geometry_type='Point'): + """ + Extract coordinates from GeoJSON features of specific geometry type. + + Args: + filepath: path to GeoJSON file + geometry_type: 'Point', 'LineString', 'Polygon', etc. + + Returns: + list of coordinate arrays + """ + geometries, _ = read_geojson(filepath) + coords = [] + for geom in geometries: + if geom['type'] == geometry_type: + coords.append(geom['coordinates']) + return coords +``` + +### 4. Spatial Joins Without Geopandas + +Perform spatial joins using pure Python algorithms. + +```python +import numpy as np +import pandas as pd + +def spatial_join_points_to_polygons(points_df, polygons_df, + point_lon_col='lon', point_lat_col='lat', + polygon_coords_col='coordinates'): + """ + Spatial join of points to polygons using point-in-polygon test. + + Args: + points_df: DataFrame with point coordinates + polygons_df: DataFrame with polygon coordinates (list of vertices) + point_lon_col, point_lat_col: column names for point coordinates + polygon_coords_col: column name containing polygon coordinates + + Returns: + DataFrame: points_df with added polygon index column + """ + results = [] + + for _, point_row in points_df.iterrows(): + point = (point_row[point_lon_col], point_row[point_lat_col]) + matched = False + + for poly_idx, poly_row in polygons_df.iterrows(): + polygon = poly_row[polygon_coords_col] + if point_in_polygon(point, polygon): + results.append({**point_row.to_dict(), 'polygon_index': poly_idx}) + matched = True + break + + if not matched: + results.append({**point_row.to_dict(), 'polygon_index': None}) + + return pd.DataFrame(results) + +# For many points, use vectorized approach +def spatial_join_points_to_polygons_vectorized(points_df, polygons_df, + point_lon_col='lon', point_lat_col='lat', + polygon_coords_col='coordinates'): + """ + Vectorized spatial join using numpy broadcasting (faster for many points). + """ + import numpy as np + + points = points_df[[point_lon_col, point_lat_col]].values + polygons = polygons_df[polygon_coords_col].values + + # Precompute bounding boxes for polygons for quick filtering + bboxes = [] + for poly in polygons: + poly_arr = np.array(poly) + bboxes.append([poly_arr[:, 0].min(), poly_arr[:, 1].min(), + poly_arr[:, 0].max(), poly_arr[:, 1].max()]) + bboxes = np.array(bboxes) + + results = [] + for i, point in enumerate(points): + # Filter polygons by bounding box first + mask = (point[0] >= bboxes[:, 0]) & (point[0] <= bboxes[:, 2]) & (point[1] >= bboxes[:, 1]) & (point[1] <= bboxes[:, 3]) + + candidate_indices = np.where(mask)[0] + matched = False + + for poly_idx in candidate_indices: + if point_in_polygon(point, polygons[poly_idx]): + results.append({**points_df.iloc[i].to_dict(), + 'polygon_index': polygons_df.index[poly_idx]}) + matched = True + break + + if not matched: + results.append({**points_df.iloc[i].to_dict(), 'polygon_index': None}) + + return pd.DataFrame(results) +``` + +### 5. Buffer Analysis on Latitude/Longitude Points + +Approximate buffer zones using distance thresholds and bounding boxes. + +```python +import numpy as np +import pandas as pd + +def create_point_buffer(lon, lat, distance_meters, n_points=36): + """ + Create a circular buffer around a point (approximated as polygon). + + Args: + lon, lat: center point coordinates in degrees + distance_meters: buffer radius in meters + n_points: number of points to approximate circle + + Returns: + list of (lon, lat) coordinates forming the buffer polygon + """ + import math + + # Earth radius in meters + R = 6371000 + + # Convert distance to angular distance in radians + angular_distance = distance_meters / R + + # Generate circle points + buffer_points = [] + for i in range(n_points): + angle = 2 * math.pi * i / n_points + + # Calculate new latitude + lat_rad = math.radians(lat) + new_lat = math.asin(math.sin(lat_rad) * math.cos(angular_distance) + + math.cos(lat_rad) * math.sin(angular_distance) * math.cos(angle)) + + # Calculate new longitude + new_lon = math.radians(lon) + math.atan2( + math.sin(angle) * math.sin(angular_distance) * math.cos(lat_rad), + math.cos(angular_distance) - math.sin(lat_rad) * math.sin(new_lat) + ) + + buffer_points.append((math.degrees(new_lon), math.degrees(new_lat))) + + # Close the polygon + buffer_points.append(buffer_points[0]) + return buffer_points + +def points_within_distance(lon_center, lat_center, points_df, + distance_meters, lon_col='lon', lat_col='lat'): + """ + Find points within a given distance of a center point. + + Args: + lon_center, lat_center: center coordinates + points_df: DataFrame with points to test + distance_meters: search radius in meters + lon_col, lat_col: column names for coordinates + + Returns: + DataFrame: subset of points within distance + """ + distances = haversine_distance_vectorized( + np.full(len(points_df), lon_center), + np.full(len(points_df), lat_center), + points_df[lon_col].values, + points_df[lat_col].values + ) + + within_mask = distances <= distance_meters + return points_df[within_mask].copy() +``` + +### 6. Working with MultiPolygons and Complex Geometries + +Handle complex GeoJSON geometries. + +```python +def flatten_geojson_geometry(geometry): + """ + Flatten GeoJSON geometry to list of coordinate arrays. + + Args: + geometry: GeoJSON geometry dict + + Returns: + list of coordinate arrays + """ + geom_type = geometry['type'] + coords = geometry['coordinates'] + + if geom_type == 'Point': + return [coords] + elif geom_type == 'LineString': + return [coords] + elif geom_type == 'Polygon': + # Polygon can have holes, return exterior ring only + return [coords[0]] + elif geom_type == 'MultiPolygon': + # Extract exterior rings from all polygons + rings = [] + for polygon in coords: + rings.append(polygon[0]) + return rings + elif geom_type == 'MultiPoint': + return coords + elif geom_type == 'MultiLineString': + return coords + else: + raise ValueError(f"Unsupported geometry type: {geom_type}") +``` + +## Common Workflows + +### 1. Fire Station Coverage Analysis + +```python +import json +import pandas as pd +import numpy as np + +# Load fire stations (points) and district boundary (polygon) +with open('fire_stations.geojson', 'r') as f: + stations_data = json.load(f) + +with open('etobicoke_boundary.geojson', 'r') as f: + boundary_data = json.load(f) + +# Extract coordinates +stations_coords = [] +stations_props = [] +for feature in stations_data['features']: + if feature['geometry']['type'] == 'Point': + stations_coords.append(feature['geometry']['coordinates']) + stations_props.append(feature.get('properties', {})) + +# Extract boundary polygon (assuming single polygon) +boundary_polygon = boundary_data['features'][0]['geometry']['coordinates'][0] + +# Check which stations are inside boundary +inside_mask = points_in_polygon(np.array(stations_coords), boundary_polygon) +stations_inside = [stations_coords[i] for i in range(len(stations_coords)) if inside_mask[i]] + +print(f"Total stations: {len(stations_coords)}") +print(f"Stations inside boundary: {len(stations_inside)}") +``` + +### 2. Distance-Based Analysis + +```python +# Calculate distances between all station pairs +n = len(stations_coords) +distances = np.zeros((n, n)) +for i in range(n): + for j in range(i+1, n): + lon1, lat1 = stations_coords[i] + lon2, lat2 = stations_coords[j] + dist = haversine_distance(lon1, lat1, lon2, lat2) + distances[i, j] = dist + distances[j, i] = dist + +# Find stations within 5km of each other +threshold = 5000 # meters +within_5km = distances <= threshold +np.fill_diagonal(within_5km, False) # exclude self +``` + +## Performance Tips + +1. **Use numpy vectorization** for operations on many points +2. **Pre-filter with bounding boxes** before expensive point-in-polygon tests +3. **Cache distance calculations** when repeated +4. **Use approximate algorithms** when exact precision isn't critical +5. **Consider using spatial indexing** (grid-based or quadtree) for large datasets + +## Limitations + +- Not as performant as compiled libraries (geopandas, shapely) +- Limited to 2D operations +- Earth curvature approximations may have small errors over large distances +- Complex geometry operations (union, intersection, difference) not implemented + +## Additional Resources + +- GeoJSON specification: https://geojson.org/ +- Haversine formula: https://en.wikipedia.org/wiki/Haversine_formula +- Point-in-polygon algorithms: https://wrf.ecse.rpi.edu/Research/Short_Notes/pnpoly.html