Merge pull request #185 from lesander/fix/fb-group-links-and-cookie-banner

Fix #172 groups not working and fix #181 cookie banner not detected
2024-11-10 06:04:17 +00:00 · 2020-12-07 02:46:40 +05:00 · 2020-12-07 02:46:40 +05:00 · b1de95ebdb
commit b1de95ebdb
parent 8b08881f6d a683e1ee24
2 changed files with 22 additions and 8 deletions
--- a/scraper/scraper.py
+++ b/scraper/scraper.py
@ -7,7 +7,7 @@ import utils
 import argparse

 from selenium import webdriver
-from selenium.common.exceptions import NoSuchElementException
+from selenium.common.exceptions import NoSuchElementException, InvalidSessionIdException
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
@ -747,6 +747,14 @@ def login(email, password):
        driver.get(fb_path)
        driver.maximize_window()

+        try:
+            # New Facebook design has an annoying cookie banner.
+            driver.find_element_by_css_selector(
+                "button[data-cookiebanner=accept_button]"
+            ).click()
+        except NoSuchElementException:
+            pass
+
        # filling the form
        driver.find_element_by_name("email").send_keys(email)
        driver.find_element_by_name("pass").send_keys(password)
@ -794,11 +802,12 @@ def scraper(**kwargs):
    if ("password" not in cfg) or ("email" not in cfg):
        print("Your email or password is missing. Kindly write them in credentials.yaml")
        exit(1)
-    urls = [
-        facebook_https_prefix + facebook_link_body + get_item_id(line)
-        for line in open("input.txt", newline="\r\n")
-        if not line.lstrip().startswith("#") and not line.strip() == ""
-    ]
+
+    urls = []
+    for line in open("input.txt", newline="\r\n"):
+        if line.lstrip().startswith("#") or line.strip() == "":
+            continue
+        urls.append(line.strip())

    if len(urls) > 0:
        print("\nStarting Scraping...")
@ -822,7 +831,10 @@ def scraper(**kwargs):
                add_group_post_to_file(f, file_name, item_id)
                f.close()
                os.chdir("../..")
-        driver.close()
+        try:
+            driver.close()
+        except InvalidSessionIdException as e:
+            print("InvalidSessionIdException while closing driver.")
    else:
        print("Input file is empty.")

--- a/scraper/utils.py
+++ b/scraper/utils.py
@ -175,7 +175,9 @@ def get_time(x):
    try:
        time = x.find_element_by_tag_name("abbr").get_attribute("title")
        time = (
-            str("%02d" % int(time.split(", ")[1].split()[1]),)
+            str(
+                "%02d" % int(time.split(", ")[1].split()[1]),
+            )
            + "-"
            + str(
                (