-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsettings.py
More file actions
101 lines (80 loc) · 3.42 KB
/
settings.py
File metadata and controls
101 lines (80 loc) · 3.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# -*- coding: utf-8 -*-
import logging
import os
from datetime import datetime, timedelta
from distutils.util import strtobool
from typing import Dict
from dotenv import load_dotenv
from scrapy.utils.log import configure_logging
load_dotenv()
BOT_NAME = "Bronya"
SPIDER_MODULES = ["spiders"]
NEWSPIDER_MODULE = "spiders"
COMMANDS_MODULE = "commands"
PROXY = os.getenv("PROXY", "")
PROXY_AUTH = os.getenv("PROXY_AUTH", "")
PROXY_ENABLED = strtobool(os.getenv("PROXY_ENABLED", "False"))
USER_AGENT_RELEASE_DATE = '2024-02-27'
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36"
CONCURRENT_REQUESTS = int(os.getenv("CONCURRENT_REQUESTS", "16"))
CONCURRENT_REQUESTS_PER_DOMAIN = int(os.getenv("CONCURRENT_REQUESTS_PER_DOMAIN", "8"))
DOWNLOAD_DELAY = int(os.getenv("DOWNLOAD_DELAY", "0"))
DOWNLOAD_TIMEOUT = int(os.getenv("DOWNLOAD_TIMEOUT", "180"))
TELNETCONSOLE_ENABLED = False
TELNETCONSOLE_PASSWORD = "password"
# Override the default request headers:
DEFAULT_REQUEST_HEADERS = {
"Accept-Language": "en-US,en;q=0.5",
"Cache-Control": "max-age=0",
}
ROTATING_PROXIES_DOWNLOADER_HANDLER_AUTO_CLOSE_CACHED_CONNECTIONS_ENABLED: bool = True
DOWNLOADER_MIDDLEWARES = {
"scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware": None,
"middlewares.HttpProxyMiddleware": 543,
}
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
LOG_FILE = os.getenv("LOG_FILE") if os.getenv("LOG_FILE", "") else None
ITEM_PIPELINES = {
'pipelines.validation_pipeline.ValidationPipeline': 300,
# Use rmq provided pipeline which manages pika connection lifecycle
'rmq.pipelines.item_producer_pipeline.ItemProducerPipeline': 400,
}
DB_HOST = os.getenv("DB_HOST", "127.0.0.1")
DB_PORT = int(os.getenv("DB_PORT", "3306"))
DB_USERNAME = os.getenv("DB_USERNAME", "root")
DB_PASSWORD = os.getenv("DB_PASSWORD", "")
DB_DATABASE = os.getenv("DB_DATABASE", "db_name")
PIKA_LOG_LEVEL = os.getenv("PIKA_LOG_LEVEL", "WARN")
logging.getLogger("pika").setLevel(PIKA_LOG_LEVEL)
RABBITMQ_HOST = os.getenv("RABBITMQ_HOST", "localhost")
RABBITMQ_PORT = int(os.getenv("RABBITMQ_PORT", "5672"))
RABBITMQ_USERNAME = os.getenv("RABBITMQ_USERNAME", "guest")
RABBITMQ_PASSWORD = os.getenv("RABBITMQ_PASSWORD", "guest")
RABBITMQ_VIRTUAL_HOST = os.getenv("RABBITMQ_VIRTUAL_HOST", "/")
try:
HTTPCACHE_ENABLED = strtobool(os.getenv("HTTPCACHE_ENABLED", "False"))
except ValueError:
HTTPCACHE_ENABLED = False
HTTPCACHE_IGNORE_HTTP_CODES = list(
map(int, (s for s in os.getenv("HTTPCACHE_IGNORE_HTTP_CODES", "").split(",") if s))
)
EXTENSIONS = {
'extensions.logging_extension.LoggingExtension': 100,
}
# Send exceptions to Sentry
IS_SENTRY_ENABLED = os.getenv("IS_SENTRY_ENABLED", "false").lower() == "true"
if IS_SENTRY_ENABLED:
SENTRY_DSN = os.getenv("SENTRY_DSN", None)
# Optionally, additional configuration options can be provided
SENTRY_CLIENT_OPTIONS = {
# these correspond to the sentry_sdk.init kwargs
"release": os.getenv("RELEASE", "0.0.0")
}
# Load SentryLogging extension before others
EXTENSIONS["scrapy_sentry_sdk.extensions.SentryLogging"] = 1
# Load SentryExtension
EXTENSIONS["extensions.sentry_extension.SentryExtension"] = 1
configure_logging()
if datetime(*[int(number) for number in USER_AGENT_RELEASE_DATE.split('-')]) + timedelta(days=180) < datetime.now():
logging.warning('USER_AGENT is outdated')
REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"