Anna Syme

Click name ↑ to return to homepage

Beautiful Soup

# conda create --name beautifulsoup
# conda activate beautifulsoup 
# conda install -c anaconda beautifulsoup4

# create a python script called
# and put this stuff in it (below)
# run script with: python

import requests 
import urllib.request
import time
from bs4 import BeautifulSoup

# Set the URL you want to webscrape from

url = ''

# Connect to the URL

result = requests.get(url)

if result.status_code == 200:
        print("page could be accessed - ok")
        print("could not access page!")
src = result.content #store contents
soup = BeautifulSoup(src, "html.parser")  #make soup object
soup # look at the soup object

# find all the a tags
# save them as "links"

links = soup.find_all('a') #find all a's

urls = []  # make an empty list for urls

for link in links:
        urls.append(link.get('href')) #add hrefs to a list

more to add

Modified from

note I think there’s a code typo - so use the code at the end of the text