Add --chromium & --headless optional flags

This commit is contained in:
sickcodes 2020-10-04 11:41:43 +00:00
parent 4f54d8473d
commit 7b136e9796
2 changed files with 39 additions and 5 deletions

View file

@ -179,7 +179,7 @@ python scraper/scraper.py
You can personalize your scrapping needs using the command line arguments:
```python
```bash
python scraper/scraper.py \
--uploaded_photos True \
--friends_photos True \
@ -192,6 +192,20 @@ python scraper/scraper.py \
Note that those are the default values so no need to write them down if you're just testing or are okay with them.
## Chromium
Chromium users can add `--chromium True` to run using the Chromium browser.
```bash
python scraper/scraper.py \
--uploaded_photos True \
--photos_small_size True \
--total_scrolls 2500 \
--scroll_time 8 \
--chromium True
```
---
## Citation 📚

View file

@ -733,12 +733,20 @@ def login(email, password):
options.add_argument("--disable-notifications")
options.add_argument("--disable-infobars")
options.add_argument("--mute-audio")
# options.add_argument("headless")
if headless:
options.add_argument('--headless')
try:
driver = webdriver.Chrome(
executable_path=ChromeDriverManager().install(), options=options
)
if chromium:
from selenium.webdriver import Chrome
driver = webdriver.Chrome(
options=options
)
else:
driver = webdriver.Chrome(
executable_path=ChromeDriverManager().install(), options=options
)
except Exception:
print("Error loading chrome webdriver " + sys.exc_info()[0])
exit(1)
@ -864,6 +872,16 @@ if __name__ == "__main__":
ap.add_argument(
"-st", "--scroll_time", help="How much time should I take to scroll?", default=8
)
ap.add_argument(
"--chromium",
help="Should I use Chromium instead?",
default=False
)
ap.add_argument(
"--headless",
help="Should I run in a headless browser?",
default=False
)
args = vars(ap.parse_args())
print(args)
@ -884,6 +902,8 @@ if __name__ == "__main__":
total_scrolls = int(args["total_scrolls"])
scroll_time = int(args["scroll_time"])
chromium = utils.to_bool(args["chromium"])
headless = utils.to_bool(args["headless"])
current_scrolls = 0
old_height = 0