Merge pull request #185 from lesander/fix/fb-group-links-and-cookie-banner

Fix #172 groups not working and fix #181 cookie banner not detected
This commit is contained in:
Haris Muneer ⚡️ 2020-12-07 02:46:40 +05:00 committed by GitHub
commit b1de95ebdb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 8 deletions

View file

@ -7,7 +7,7 @@ import utils
import argparse
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoSuchElementException, InvalidSessionIdException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
@ -747,6 +747,14 @@ def login(email, password):
driver.get(fb_path)
driver.maximize_window()
try:
# New Facebook design has an annoying cookie banner.
driver.find_element_by_css_selector(
"button[data-cookiebanner=accept_button]"
).click()
except NoSuchElementException:
pass
# filling the form
driver.find_element_by_name("email").send_keys(email)
driver.find_element_by_name("pass").send_keys(password)
@ -794,11 +802,12 @@ def scraper(**kwargs):
if ("password" not in cfg) or ("email" not in cfg):
print("Your email or password is missing. Kindly write them in credentials.yaml")
exit(1)
urls = [
facebook_https_prefix + facebook_link_body + get_item_id(line)
for line in open("input.txt", newline="\r\n")
if not line.lstrip().startswith("#") and not line.strip() == ""
]
urls = []
for line in open("input.txt", newline="\r\n"):
if line.lstrip().startswith("#") or line.strip() == "":
continue
urls.append(line.strip())
if len(urls) > 0:
print("\nStarting Scraping...")
@ -822,7 +831,10 @@ def scraper(**kwargs):
add_group_post_to_file(f, file_name, item_id)
f.close()
os.chdir("../..")
driver.close()
try:
driver.close()
except InvalidSessionIdException as e:
print("InvalidSessionIdException while closing driver.")
else:
print("Input file is empty.")

View file

@ -175,7 +175,9 @@ def get_time(x):
try:
time = x.find_element_by_tag_name("abbr").get_attribute("title")
time = (
str("%02d" % int(time.split(", ")[1].split()[1]),)
str(
"%02d" % int(time.split(", ")[1].split()[1]),
)
+ "-"
+ str(
(