File tree Expand file tree Collapse file tree
src/crawlee/crawlers/_abstract_http Expand file tree Collapse file tree Original file line number Diff line number Diff line change 22
33import asyncio
44import logging
5+ import warnings
56from abc import ABC
67from datetime import timedelta
78from typing import TYPE_CHECKING , Generic
@@ -47,6 +48,12 @@ class HttpCrawlerOptions(
4748 navigation_timeout : NotRequired [timedelta | None ]
4849 """Timeout for the HTTP request."""
4950
51+ impersonate : NotRequired [bool ]
52+ """Whether the default HTTP client should impersonate a browser by sending browser-like headers. This applies only
53+ to the default client. If you pass your own `http_client`, this flag is ignored and you configure impersonation
54+ on that client directly.
55+ """
56+
5057
5158@docs_group ('Crawlers' )
5259class AbstractHttpCrawler (
@@ -101,6 +108,15 @@ def __init__(
101108
102109 if impersonate is False and 'http_client' not in kwargs :
103110 kwargs ['http_client' ] = ImpitHttpClient (browser = None )
111+ elif impersonate is False and 'http_client' in kwargs :
112+ warnings .warn (
113+ (
114+ '`impersonate` option is ignored when custom `http_client` is provided. '
115+ 'Please configure impersonation directly on the `http_client` instance.'
116+ ),
117+ category = UserWarning ,
118+ stacklevel = 2 ,
119+ )
104120
105121 kwargs .setdefault ('_logger' , logging .getLogger (self .__class__ .__name__ ))
106122 super ().__init__ (** kwargs )
You can’t perform that action at this time.
0 commit comments