DedSecInside · KingAkeem · Jan 18, 2023 · Jan 16, 2023 · Jan 17, 2023
diff --git a/torbot/modules/collect_data.py b/torbot/modules/collect_data.py
@@ -11,7 +11,7 @@
 
 from .utils import join_local_path
 from .validators import validate_link
-
+from .log import debug
 
 def parse_links(html: str):
     """Parses HTML page to extract links.
@@ -58,16 +58,20 @@ def collect_data(user_url: str):
         writer = SafeDictWriter(outcsv, fieldnames=fieldnames)
         bar = Bar('Processing...', max=len(links))
         for link in links:
-            resp = requests.get(link)
-            soup = BeautifulSoup(resp.text, 'html.parser')
-            meta_tags = parse_meta_tags(soup)
-            entry = {
-                "ID": uuid.uuid4(),
-                "Title": soup.title.string,
-                "Metadata": meta_tags,
-                "Content": soup.find('body')
-            }
-            writer.writerow(entry)
+            try:
+                resp = requests.get(link)
+                soup = BeautifulSoup(resp.text, 'html.parser')
+                meta_tags = parse_meta_tags(soup)
+                entry = {
+                    "ID": uuid.uuid4(),
+                    "Title": soup.title.string if soup.title else "",
+                    "Metadata": meta_tags,
+                    "Content": soup.find('body')
+                }
+                writer.writerow(entry)
+            except requests.exceptions.RequestException as e:
+                debug(e)
+                debug(f"Failed to connect to [{link}].")
             bar.next()
     bar.finish()
     print(f'Data has been saved to {file_path}.')