From 7b136e97963855b9fa51cba5ec5c00de763eeaaa Mon Sep 17 00:00:00 2001 From: sickcodes Date: Sun, 4 Oct 2020 11:41:43 +0000 Subject: [PATCH] Add --chromium & --headless optional flags --- README.md | 16 +++++++++++++++- scraper/scraper.py | 28 ++++++++++++++++++++++++---- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index fba9538..4fde4cc 100644 --- a/README.md +++ b/README.md @@ -179,7 +179,7 @@ python scraper/scraper.py You can personalize your scrapping needs using the command line arguments: -```python +```bash python scraper/scraper.py \ --uploaded_photos True \ --friends_photos True \ @@ -192,6 +192,20 @@ python scraper/scraper.py \ Note that those are the default values so no need to write them down if you're just testing or are okay with them. +## Chromium + +Chromium users can add `--chromium True` to run using the Chromium browser. + +```bash +python scraper/scraper.py \ + --uploaded_photos True \ + --photos_small_size True \ + --total_scrolls 2500 \ + --scroll_time 8 \ + --chromium True +``` + + --- ## Citation 📚 diff --git a/scraper/scraper.py b/scraper/scraper.py index 24bf76d..e34efbc 100644 --- a/scraper/scraper.py +++ b/scraper/scraper.py @@ -733,12 +733,20 @@ def login(email, password): options.add_argument("--disable-notifications") options.add_argument("--disable-infobars") options.add_argument("--mute-audio") - # options.add_argument("headless") + + if headless: + options.add_argument('--headless') try: - driver = webdriver.Chrome( - executable_path=ChromeDriverManager().install(), options=options - ) + if chromium: + from selenium.webdriver import Chrome + driver = webdriver.Chrome( + options=options + ) + else: + driver = webdriver.Chrome( + executable_path=ChromeDriverManager().install(), options=options + ) except Exception: print("Error loading chrome webdriver " + sys.exc_info()[0]) exit(1) @@ -864,6 +872,16 @@ if __name__ == "__main__": ap.add_argument( "-st", "--scroll_time", help="How much time should I take to scroll?", default=8 ) + ap.add_argument( + "--chromium", + help="Should I use Chromium instead?", + default=False + ) + ap.add_argument( + "--headless", + help="Should I run in a headless browser?", + default=False + ) args = vars(ap.parse_args()) print(args) @@ -884,6 +902,8 @@ if __name__ == "__main__": total_scrolls = int(args["total_scrolls"]) scroll_time = int(args["scroll_time"]) + chromium = utils.to_bool(args["chromium"]) + headless = utils.to_bool(args["headless"]) current_scrolls = 0 old_height = 0