forked from CloakHQ/CloakBrowser
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrapling_example.py
More file actions
42 lines (29 loc) · 1.11 KB
/
scrapling_example.py
File metadata and controls
42 lines (29 loc) · 1.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
"""Scrapling + CloakBrowser: adaptive web scraping with stealth fingerprints.
Scrapling handles parsing and element tracking,
CloakBrowser handles bot detection.
Requires: pip install scrapling[all] cloakbrowser
"""
import asyncio
import json
from urllib.request import urlopen
from scrapling.fetchers import StealthyFetcher
from cloakbrowser import launch_async
async def main():
# Launch CloakBrowser with remote debugging
cb_browser = await launch_async(
headless=True,
args=["--remote-debugging-port=9245", "--remote-debugging-address=127.0.0.1"],
)
# Get the WebSocket URL from Chrome (Scrapling requires ws:// scheme)
info = json.loads(urlopen("http://127.0.0.1:9245/json/version").read())
ws_url = info["webSocketDebuggerUrl"]
# Connect Scrapling to the stealth browser via CDP
page = await StealthyFetcher.async_fetch(
"https://example.com",
cdp_url=ws_url,
)
print(f"Title: {page.css('title::text').get()}")
print(f"Text: {page.css('p::text').getall()}")
await cb_browser.close()
if __name__ == "__main__":
asyncio.run(main())