Skip to content

Commit 681bc28

Browse files
committed
add warning
1 parent 97e3c75 commit 681bc28

1 file changed

Lines changed: 16 additions & 0 deletions

File tree

src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import asyncio
44
import logging
5+
import warnings
56
from abc import ABC
67
from datetime import timedelta
78
from typing import TYPE_CHECKING, Generic
@@ -47,6 +48,12 @@ class HttpCrawlerOptions(
4748
navigation_timeout: NotRequired[timedelta | None]
4849
"""Timeout for the HTTP request."""
4950

51+
impersonate: NotRequired[bool]
52+
"""Whether the default HTTP client should impersonate a browser by sending browser-like headers. This applies only
53+
to the default client. If you pass your own `http_client`, this flag is ignored and you configure impersonation
54+
on that client directly.
55+
"""
56+
5057

5158
@docs_group('Crawlers')
5259
class AbstractHttpCrawler(
@@ -101,6 +108,15 @@ def __init__(
101108

102109
if impersonate is False and 'http_client' not in kwargs:
103110
kwargs['http_client'] = ImpitHttpClient(browser=None)
111+
elif impersonate is False and 'http_client' in kwargs:
112+
warnings.warn(
113+
(
114+
'`impersonate` option is ignored when custom `http_client` is provided. '
115+
'Please configure impersonation directly on the `http_client` instance.'
116+
),
117+
category=UserWarning,
118+
stacklevel=2,
119+
)
104120

105121
kwargs.setdefault('_logger', logging.getLogger(self.__class__.__name__))
106122
super().__init__(**kwargs)

0 commit comments

Comments
 (0)