mirror of
https://github.com/harismuneer/Ultimate-Facebook-Scraper
synced 2024-11-10 06:04:17 +00:00
Initial commit
This commit is contained in:
commit
a03c786286
7 changed files with 675 additions and 0 deletions
2
.gitattributes
vendored
Normal file
2
.gitattributes
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
# Auto detect text files and perform LF normalization
|
||||
* text=auto
|
21
LICENSE
Normal file
21
LICENSE
Normal file
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2018 harismuneer
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
1
README.md
Normal file
1
README.md
Normal file
|
@ -0,0 +1 @@
|
|||
# Facebook Scrapper
|
BIN
chromedriver
Normal file
BIN
chromedriver
Normal file
Binary file not shown.
BIN
chromedriver.exe
Normal file
BIN
chromedriver.exe
Normal file
Binary file not shown.
0
input.txt
Normal file
0
input.txt
Normal file
651
scraper.py
Normal file
651
scraper.py
Normal file
|
@ -0,0 +1,651 @@
|
|||
import calendar
|
||||
import os
|
||||
import platform
|
||||
import sys
|
||||
import urllib.request
|
||||
|
||||
from selenium import webdriver
|
||||
from selenium.common.exceptions import TimeoutException
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# -------------------------------------------------------------
|
||||
|
||||
|
||||
# Global Variables
|
||||
|
||||
driver = None
|
||||
|
||||
# whether to download photos or not
|
||||
download_uploaded_photos = True
|
||||
download_friends_photos = True
|
||||
|
||||
# whether to download the full image or its thumbnail (small size)
|
||||
# if small size is True then it will be very quick else if its false then it will open each photo to download it
|
||||
# and it will take much more time
|
||||
friends_small_size = True
|
||||
photos_small_size = True
|
||||
|
||||
total_scrolls = 5000
|
||||
current_scrolls = 0
|
||||
scroll_time = 5
|
||||
|
||||
old_height = 0
|
||||
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# -------------------------------------------------------------
|
||||
|
||||
def get_facebook_images_url(img_links):
|
||||
urls = []
|
||||
|
||||
for link in img_links:
|
||||
|
||||
if link != "None":
|
||||
valid_url_found = False
|
||||
driver.get(link)
|
||||
|
||||
try:
|
||||
while not valid_url_found:
|
||||
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "spotlight")))
|
||||
element = driver.find_element_by_class_name("spotlight")
|
||||
img_url = element.get_attribute('src')
|
||||
|
||||
if img_url.find('.gif') == -1:
|
||||
valid_url_found = True
|
||||
urls.append(img_url)
|
||||
|
||||
except EC.StaleElementReferenceException:
|
||||
urls.append(driver.find_element_by_class_name("spotlight").get_attribute('src'))
|
||||
|
||||
except:
|
||||
print("Exception (facebook_image_downloader):", sys.exc_info()[0])
|
||||
|
||||
else:
|
||||
urls.append("None")
|
||||
|
||||
return urls
|
||||
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# -------------------------------------------------------------
|
||||
|
||||
# takes a url and downloads image from that url
|
||||
def image_downloader(img_links, folder_name):
|
||||
img_names = []
|
||||
|
||||
try:
|
||||
parent = os.getcwd()
|
||||
try:
|
||||
folder = os.path.join(os.getcwd(), folder_name)
|
||||
if not os.path.exists(folder):
|
||||
os.mkdir(folder)
|
||||
|
||||
os.chdir(folder)
|
||||
except:
|
||||
print("Error in changing directory")
|
||||
|
||||
for link in img_links:
|
||||
img_name = "None"
|
||||
|
||||
if link != "None":
|
||||
img_name = (link.split('.jpg')[0]).split('/')[-1] + '.jpg'
|
||||
|
||||
if img_name == "10354686_10150004552801856_220367501106153455_n.jpg":
|
||||
img_name = "None"
|
||||
else:
|
||||
try:
|
||||
urllib.request.urlretrieve(link, img_name)
|
||||
except:
|
||||
img_name = "None"
|
||||
|
||||
img_names.append(img_name)
|
||||
|
||||
os.chdir(parent)
|
||||
except:
|
||||
print("Exception (image_downloader):", sys.exc_info()[0])
|
||||
|
||||
return img_names
|
||||
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# -------------------------------------------------------------
|
||||
|
||||
def check_height():
|
||||
new_height = driver.execute_script("return document.body.scrollHeight")
|
||||
return new_height != old_height
|
||||
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# -------------------------------------------------------------
|
||||
|
||||
# helper function: used to scroll the page
|
||||
def scroll():
|
||||
global old_height
|
||||
current_scrolls = 0
|
||||
|
||||
while (True):
|
||||
try:
|
||||
if current_scrolls == total_scrolls:
|
||||
return
|
||||
|
||||
old_height = driver.execute_script("return document.body.scrollHeight")
|
||||
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
||||
WebDriverWait(driver, scroll_time, 0.05).until(lambda driver: check_height())
|
||||
current_scrolls += 1
|
||||
except TimeoutException:
|
||||
break
|
||||
|
||||
return
|
||||
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# -------------------------------------------------------------
|
||||
|
||||
# --Helper Functions for Posts
|
||||
|
||||
def get_status(x):
|
||||
status = ""
|
||||
try:
|
||||
status = x.find_element_by_xpath(".//div[@class='_5pbx userContent']").text
|
||||
except:
|
||||
try:
|
||||
status = x.find_element_by_xpath(".//div[@class='userContent']").text
|
||||
except:
|
||||
pass
|
||||
return status
|
||||
|
||||
|
||||
def get_div_links(x, tag):
|
||||
try:
|
||||
temp = x.find_element_by_xpath(".//div[@class='_3x-2']")
|
||||
return temp.find_element_by_tag_name(tag)
|
||||
except:
|
||||
return ""
|
||||
|
||||
|
||||
def get_title_links(title):
|
||||
l = title.find_elements_by_tag_name('a')
|
||||
return l[-1].text, l[-1].get_attribute('href')
|
||||
|
||||
|
||||
def get_title(x):
|
||||
title = ""
|
||||
try:
|
||||
title = x.find_element_by_xpath(".//span[@class='fwb fcg']")
|
||||
except:
|
||||
try:
|
||||
title = x.find_element_by_xpath(".//span[@class='fcg']")
|
||||
except:
|
||||
try:
|
||||
title = x.find_element_by_xpath(".//span[@class='fwn fcg']")
|
||||
except:
|
||||
pass
|
||||
finally:
|
||||
return title
|
||||
|
||||
|
||||
def get_time(x):
|
||||
time = ""
|
||||
try:
|
||||
time = x.find_element_by_tag_name('abbr').get_attribute('title')
|
||||
time = str("%02d" % int(time.split(", ")[1].split()[1]), ) + "-" + str(
|
||||
("%02d" % (int((list(calendar.month_abbr).index(time.split(", ")[1].split()[0][:3]))),))) + "-" + \
|
||||
time.split()[3] + " " + str("%02d" % int(time.split()[5].split(":")[0])) + ":" + str(
|
||||
time.split()[5].split(":")[1])
|
||||
except:
|
||||
pass
|
||||
|
||||
finally:
|
||||
return time
|
||||
|
||||
|
||||
def extract_and_write_posts(elements, filename):
|
||||
try:
|
||||
f = open(filename, "w", newline='\r\n')
|
||||
f.writelines(' TIME || TYPE || TITLE || STATUS || LINKS(Shared Posts/Shared Links etc) ' + '\n' + '\n')
|
||||
|
||||
for x in elements:
|
||||
try:
|
||||
video_link = " "
|
||||
title = " "
|
||||
status = " "
|
||||
link = ""
|
||||
img = " "
|
||||
time = " "
|
||||
|
||||
# time
|
||||
time = get_time(x)
|
||||
|
||||
# title
|
||||
title = get_title(x)
|
||||
if title.text.find("shared a memory") != -1:
|
||||
x = x.find_element_by_xpath(".//div[@class='_1dwg _1w_m']")
|
||||
title = get_title(x)
|
||||
|
||||
status = get_status(x)
|
||||
if title.text == driver.find_element_by_id("fb-timeline-cover-name").text:
|
||||
if status == '':
|
||||
temp = get_div_links(x, "img")
|
||||
if temp == '': # no image tag which means . it is not a life event
|
||||
link = get_div_links(x, "a").get_attribute('href')
|
||||
type = "status update without text"
|
||||
else:
|
||||
type = 'life event'
|
||||
link = get_div_links(x, "a").get_attribute('href')
|
||||
status = get_div_links(x, "a").text
|
||||
else:
|
||||
type = "status update"
|
||||
if get_div_links(x, "a") != '':
|
||||
link = get_div_links(x, "a").get_attribute('href')
|
||||
|
||||
elif title.text.find(" shared ") != -1:
|
||||
|
||||
x1, link = get_title_links(title)
|
||||
type = "shared " + x1
|
||||
|
||||
elif title.text.find(" at ") != -1 or title.text.find(" in ") != -1:
|
||||
if title.text.find(" at ") != -1:
|
||||
x1, link = get_title_links(title)
|
||||
type = "check in"
|
||||
elif title.text.find(" in ") != 1:
|
||||
status = get_div_links(x, "a").text
|
||||
|
||||
elif title.text.find(" added ") != -1 and title.text.find("photo") != -1:
|
||||
type = "added photo"
|
||||
link = get_div_links(x, "a").get_attribute('href')
|
||||
|
||||
elif title.text.find(" added ") != -1 and title.text.find("video") != -1:
|
||||
type = "added video"
|
||||
link = get_div_links(x, "a").get_attribute('href')
|
||||
|
||||
else:
|
||||
type = "others"
|
||||
|
||||
if not isinstance(title, str):
|
||||
title = title.text
|
||||
|
||||
status = status.replace("\n", " ")
|
||||
title = title.replace("\n", " ")
|
||||
|
||||
line = str(time) + " || " + str(type) + ' || ' + str(title) + ' || ' + str(status) + ' || ' + str(
|
||||
link) + "\n"
|
||||
|
||||
try:
|
||||
f.writelines(line)
|
||||
except:
|
||||
print('Posts: Could not map encoded characters')
|
||||
except:
|
||||
pass
|
||||
f.close()
|
||||
except:
|
||||
print("Exception (extract_and_write_posts)", "Status =", sys.exc_info()[0])
|
||||
|
||||
return
|
||||
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# -------------------------------------------------------------
|
||||
|
||||
|
||||
def save_to_file(name, elements, status, current_section):
|
||||
"""helper function used to save links to files"""
|
||||
|
||||
# status 0 = dealing with friends list
|
||||
# status 1 = dealing with photos
|
||||
# status 2 = dealing with videos
|
||||
# status 3 = dealing with about section
|
||||
# status 4 = dealing with posts
|
||||
|
||||
try:
|
||||
|
||||
f = None # file pointer
|
||||
|
||||
if status != 4:
|
||||
f = open(name, 'w', encoding='utf-8', newline='\r\n')
|
||||
|
||||
results = []
|
||||
img_names = []
|
||||
|
||||
# dealing with Friends
|
||||
if status == 0:
|
||||
|
||||
results = [x.get_attribute('href') for x in elements]
|
||||
results = [create_original_link(x) for x in results]
|
||||
|
||||
try:
|
||||
if download_friends_photos:
|
||||
|
||||
if friends_small_size:
|
||||
img_links = [x.find_element_by_css_selector('img').get_attribute('src') for x in elements]
|
||||
else:
|
||||
links = []
|
||||
for friend in results:
|
||||
driver.get(friend)
|
||||
WebDriverWait(driver, 10).until(
|
||||
EC.presence_of_element_located((By.CLASS_NAME, "profilePicThumb")))
|
||||
l = driver.find_element_by_class_name("profilePicThumb").get_attribute('href')
|
||||
links.append(l)
|
||||
|
||||
for i in range(len(links)):
|
||||
if links[i].find('picture/view') != -1:
|
||||
links[i] = "None"
|
||||
|
||||
img_links = get_facebook_images_url(links)
|
||||
|
||||
folder_names = ["Friend's Photos", "Following's Photos", "Follower's Photos", "Work Friends Photos",
|
||||
"College Friends Photos", "Current City Friends Photos", "Hometown Friends Photos"]
|
||||
print("Downloading " + folder_names[current_section])
|
||||
|
||||
img_names = image_downloader(img_links, folder_names[current_section])
|
||||
except:
|
||||
print("Exception (Images)", str(status), "Status =", current_section, sys.exc_info()[0])
|
||||
|
||||
# dealing with Photos
|
||||
elif status == 1:
|
||||
results = [x.get_attribute('href') for x in elements]
|
||||
results.pop(0)
|
||||
|
||||
try:
|
||||
if download_uploaded_photos:
|
||||
if photos_small_size:
|
||||
background_img_links = driver.find_elements_by_xpath("//*[contains(@id, 'pic_')]/div/i")
|
||||
background_img_links = [x.get_attribute('style') for x in background_img_links]
|
||||
background_img_links = [((x.split('(')[1]).split(')')[0]).strip('"') for x in
|
||||
background_img_links]
|
||||
else:
|
||||
background_img_links = get_facebook_images_url(results)
|
||||
|
||||
folder_names = ["Uploaded Photos", "Tagged Photos"]
|
||||
print("Downloading " + folder_names[current_section])
|
||||
|
||||
img_names = image_downloader(background_img_links, folder_names[current_section])
|
||||
except:
|
||||
print("Exception (Images)", str(status), "Status =", current_section, sys.exc_info()[0])
|
||||
|
||||
# dealing with Videos
|
||||
elif status == 2:
|
||||
results = elements[0].find_elements_by_css_selector('li')
|
||||
results = [x.find_element_by_css_selector('a').get_attribute('href') for x in results]
|
||||
|
||||
try:
|
||||
if results[0][0] == '/':
|
||||
results = [r.pop(0) for r in results]
|
||||
results = [("https://en-gb.facebook.com/" + x) for x in results]
|
||||
except:
|
||||
pass
|
||||
|
||||
# dealing with About Section
|
||||
elif status == 3:
|
||||
results = elements[0].text
|
||||
f.writelines(results)
|
||||
|
||||
# dealing with Posts
|
||||
elif status == 4:
|
||||
extract_and_write_posts(elements, name)
|
||||
return
|
||||
|
||||
if (status == 0) or (status == 1):
|
||||
for i in range(len(results)):
|
||||
f.writelines(results[i])
|
||||
f.write(',')
|
||||
try:
|
||||
f.writelines(img_names[i])
|
||||
except:
|
||||
f.writelines("None")
|
||||
f.write('\n')
|
||||
|
||||
elif status == 2:
|
||||
for x in results:
|
||||
f.writelines(x + "\n")
|
||||
|
||||
f.close()
|
||||
|
||||
except:
|
||||
print("Exception (save_to_file)", "Status =", str(status), sys.exc_info()[0])
|
||||
|
||||
return
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
def scrap_data(id, scan_list, section, elements_path, save_status, file_names):
|
||||
"""Given some parameters, this function can scrap friends/photos/videos/about/posts(statuses) of a profile"""
|
||||
|
||||
page = []
|
||||
|
||||
if save_status == 4:
|
||||
page.append(id)
|
||||
|
||||
for i in range(len(section)):
|
||||
page.append(id + section[i])
|
||||
|
||||
for i in range(len(scan_list)):
|
||||
try:
|
||||
driver.get(page[i])
|
||||
|
||||
if (save_status == 0) or (save_status == 1) or (
|
||||
save_status == 2): # Only run this for friends, photos and videos
|
||||
|
||||
# the bar which contains all the sections
|
||||
sections_bar = driver.find_element_by_xpath("//*[@class='_3cz'][1]/div[2]/div[1]")
|
||||
|
||||
if sections_bar.text.find(scan_list[i]) == -1:
|
||||
continue
|
||||
|
||||
if save_status != 3:
|
||||
scroll()
|
||||
|
||||
data = driver.find_elements_by_xpath(elements_path[i])
|
||||
|
||||
save_to_file(file_names[i], data, save_status, i)
|
||||
|
||||
except:
|
||||
print("Exception (scrap_data)", str(i), "Status =", str(save_status), sys.exc_info()[0])
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
def create_original_link(url):
|
||||
if url.find(".php") != -1:
|
||||
original_link = "https://en-gb.facebook.com/" + ((url.split("="))[1])
|
||||
|
||||
if original_link.find("&") != -1:
|
||||
original_link = original_link.split("&")[0]
|
||||
|
||||
elif url.find("fnr_t") != -1:
|
||||
original_link = "https://en-gb.facebook.com/" + ((url.split("/"))[-1].split("?")[0])
|
||||
elif url.find("_tab") != -1:
|
||||
original_link = "https://en-gb.facebook.com/" + (url.split("?")[0]).split("/")[-1]
|
||||
else:
|
||||
original_link = url
|
||||
|
||||
return original_link
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
def scrap_profile(ids):
|
||||
folder = os.path.join(os.getcwd(), "Data")
|
||||
|
||||
if not os.path.exists(folder):
|
||||
os.mkdir(folder)
|
||||
|
||||
os.chdir(folder)
|
||||
|
||||
# execute for all profiles given in input.txt file
|
||||
for id in ids:
|
||||
|
||||
driver.get(id)
|
||||
url = driver.current_url
|
||||
id = create_original_link(url)
|
||||
|
||||
print("\nScraping:", id)
|
||||
|
||||
try:
|
||||
if not os.path.exists(os.path.join(folder, id.split('/')[-1])):
|
||||
os.mkdir(os.path.join(folder, id.split('/')[-1]))
|
||||
else:
|
||||
print("A folder with the same profile name already exists."
|
||||
" Kindly remove that folder first and then run this code.")
|
||||
continue
|
||||
os.chdir(os.path.join(folder, id.split('/')[-1]))
|
||||
except:
|
||||
print("Some error occurred in creating the profile directory.")
|
||||
continue
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
print("----------------------------------------")
|
||||
print("Friends..")
|
||||
# setting parameters for scrap_data() to scrap friends
|
||||
scan_list = ["All", "Following", "Followers", "Work", "College", "Current City", "Hometown"]
|
||||
section = ["/friends", "/following", "/followers", "/friends_work", "/friends_college", "/friends_current_city",
|
||||
"/friends_hometown"]
|
||||
elements_path = ["//*[contains(@id,'pagelet_timeline_medley_friends')][1]/div[2]/div/ul/li/div/a",
|
||||
"//*[contains(@class,'_3i9')][1]/div/div/ul/li[1]/div[2]/div/div/div/div/div[2]/ul/li/div/a",
|
||||
"//*[contains(@class,'fbProfileBrowserListItem')]/div/a",
|
||||
"//*[contains(@id,'pagelet_timeline_medley_friends')][1]/div[2]/div/ul/li/div/a",
|
||||
"//*[contains(@id,'pagelet_timeline_medley_friends')][1]/div[2]/div/ul/li/div/a",
|
||||
"//*[contains(@id,'pagelet_timeline_medley_friends')][1]/div[2]/div/ul/li/div/a",
|
||||
"//*[contains(@id,'pagelet_timeline_medley_friends')][1]/div[2]/div/ul/li/div/a"]
|
||||
file_names = ["All Friends.txt", "Following.txt", "Followers.txt", "Work Friends.txt", "College Friends.txt",
|
||||
"Current City Friends.txt", "Hometown Friends.txt"]
|
||||
save_status = 0
|
||||
|
||||
scrap_data(id, scan_list, section, elements_path, save_status, file_names)
|
||||
print("Friends Done")
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
print("----------------------------------------")
|
||||
print("Photos..")
|
||||
print("Scraping Links..")
|
||||
# setting parameters for scrap_data() to scrap photos
|
||||
scan_list = ["'s Photos", "Photos of"]
|
||||
section = ["/photos_all", "/photos_of"]
|
||||
elements_path = ["//*[contains(@id, 'pic_')]"] * 2
|
||||
file_names = ["Uploaded Photos.txt", "Tagged Photos.txt"]
|
||||
save_status = 1
|
||||
|
||||
scrap_data(id, scan_list, section, elements_path, save_status, file_names)
|
||||
print("Photos Done")
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
print("----------------------------------------")
|
||||
print("Videos:")
|
||||
# setting parameters for scrap_data() to scrap videos
|
||||
scan_list = ["'s Videos", "Videos of"]
|
||||
section = ["/videos_by", "/videos_of"]
|
||||
elements_path = ["//*[contains(@id, 'pagelet_timeline_app_collection_')]/ul"] * 2
|
||||
file_names = ["Uploaded Videos.txt", "Tagged Videos.txt"]
|
||||
save_status = 2
|
||||
|
||||
scrap_data(id, scan_list, section, elements_path, save_status, file_names)
|
||||
print("Videos Done")
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
print("----------------------------------------")
|
||||
print("About:")
|
||||
# setting parameters for scrap_data() to scrap the about section
|
||||
scan_list = [None] * 7
|
||||
section = ["/about?section=overview", "/about?section=education", "/about?section=living",
|
||||
"/about?section=contact-info", "/about?section=relationship", "/about?section=bio",
|
||||
"/about?section=year-overviews"]
|
||||
elements_path = ["//*[contains(@id, 'pagelet_timeline_app_collection_')]/ul/li/div/div[2]/div/div"] * 7
|
||||
file_names = ["Overview.txt", "Work and Education.txt", "Places Lived.txt", "Contact and Basic Info.txt",
|
||||
"Family and Relationships.txt", "Details About.txt", "Life Events.txt"]
|
||||
save_status = 3
|
||||
|
||||
scrap_data(id, scan_list, section, elements_path, save_status, file_names)
|
||||
print("About Section Done")
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
print("----------------------------------------")
|
||||
print("Posts:")
|
||||
# setting parameters for scrap_data() to scrap posts
|
||||
scan_list = [None]
|
||||
section = []
|
||||
elements_path = ["//div[@class='_4-u2 mbm _4mrt _5jmm _5pat _5v3q _4-u8']"]
|
||||
|
||||
file_names = ["Posts.txt"]
|
||||
save_status = 4
|
||||
|
||||
scrap_data(id, scan_list, section, elements_path, save_status, file_names)
|
||||
print("Posts(Statuses) Done")
|
||||
print("----------------------------------------")
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
print("\nProcess Completed.")
|
||||
|
||||
return
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
def login(email, password):
|
||||
""" Logging into our own profile """
|
||||
|
||||
try:
|
||||
global driver
|
||||
|
||||
options = Options()
|
||||
|
||||
# Code to disable notifications pop up of Chrome Browser
|
||||
options.add_argument("--disable-notifications")
|
||||
# options.add_argument("headless")
|
||||
|
||||
# initialization
|
||||
if platform.system() != "Windows":
|
||||
driver = webdriver.Chrome(executable_path=os.getcwd() + "/chromedriver", chrome_options=options)
|
||||
else:
|
||||
driver = webdriver.Chrome(executable_path=os.getcwd() + "/chromedriver.exe", chrome_options=options)
|
||||
|
||||
driver.get("https://en-gb.facebook.com")
|
||||
driver.maximize_window()
|
||||
|
||||
# filling the form
|
||||
driver.find_element_by_name('email').send_keys(email)
|
||||
driver.find_element_by_name('pass').send_keys(password)
|
||||
|
||||
# clicking on login button
|
||||
driver.find_element_by_id('loginbutton').click()
|
||||
|
||||
except:
|
||||
print("There's some error in log in.")
|
||||
exit()
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
ids = ["https://en-gb.facebook.com/" + line.split("/")[-1] for line in open("input.txt", newline='\n')]
|
||||
|
||||
if len(ids) > 0:
|
||||
# Getting email and password from user to login into his/her profile
|
||||
email = input('\nEnter your Facebook Email: ')
|
||||
password = input('Enter your Facebook Password: ')
|
||||
|
||||
print("\nStarting Scraping...")
|
||||
|
||||
login(email, password)
|
||||
scrap_profile(ids)
|
||||
driver.close()
|
||||
else:
|
||||
print("Input file is empty..")
|
||||
|
||||
|
||||
# -------------------------------------------------------------
|
||||
# -------------------------------------------------------------
|
||||
# -------------------------------------------------------------
|
||||
|
||||
|
||||
# get things rolling
|
||||
main()
|
Loading…
Reference in a new issue