import requests
from bs4 import BeautifulSoup
import re
def extract_social_media_profiles(url):
try:
# Send a GET request to the provided URL
response = requests.get(url)
response.raise_for_status() # Raise an exception if the response status code is not 200
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
# Define social media platforms and their corresponding keywords
social_media_platforms = {
"Facebook": re.compile(r'(facebook.com|fb.com)'),
"Instagram": re.compile(r'(instagram.com|instagr.am)'),
"LinkedIn": re.compile(r'(linkedin.com)'),
"YouTube": re.compile(r'(youtube.com)'),
"Twitter": re.compile(r'(twitter.com)')
}
# Initialize a dictionary to store the URLs
social_media_urls = {}
# Find all anchor tags with href containing social media keywords
for platform, regex in social_media_platforms.items():
social_media_links = soup.find_all('a', href=regex)
if social_media_links:
# Prepend '@' to the URL and store it in the dictionary
social_media_urls[platform] = '@' + social_media_links[0]['href']
return social_media_urls
except requests.RequestException as e:
print(f"Error fetching content from {url}: {e}")
return None
def main():
urls = []
while True:
target_url = input("Please enter a URL to search for social media profiles (or 'exit' to quit): ")
if target_url.lower() == 'exit':
break
urls.append(target_url)
for url in urls:
print(f"Searching social media profiles for {url}")
social_media_profiles = extract_social_media_profiles(url)
if social_media_profiles:
print("Social media profiles found:")
for platform in ["Facebook", "Instagram", "LinkedIn", "YouTube", "Twitter"]:
if platform in social_media_profiles:
print(f"{platform}: {social_media_profiles[platform]}")
else:
print(f"{platform}: N/A")
else:
print("No social media profiles found on the provided URL.")
if __name__ == "__main__":
main()