Click name ↑ to return to homepage
# conda create --name beautifulsoup
# conda activate beautifulsoup
# conda install -c anaconda beautifulsoup4
# create a python script called beautifulsoup.py
# and put this stuff in it (below)
# run script with: python beautifulsoup.py
import requests
import urllib.request
import time
from bs4 import BeautifulSoup
# Set the URL you want to webscrape from
url = 'http://web.mta.info/developers/turnstile.html'
# Connect to the URL
result = requests.get(url)
if result.status_code == 200:
print("page could be accessed - ok")
else:
print("could not access page!")
src = result.content #store contents
soup = BeautifulSoup(src, "html.parser") #make soup object
soup # look at the soup object
# find all the a tags
# save them as "links"
links = soup.find_all('a') #find all a's
urls = [] # make an empty list for urls
for link in links:
urls.append(link.get('href')) #add hrefs to a list
more to add
Modified from
https://towardsdatascience.com/how-to-web-scrape-with-python-in-4-minutes-bc49186a8460
note I think there’s a code typo - so use the code at the end of the text