Use Shifter with Beautiful Soup
Pair Shifter's residential and ISP proxies with Beautiful Soup for clean, expressive Python scraping. Beautiful Soup handles HTML parsing, Shifter handles the residential IPs — no headless browser required.
Quick Start
Install
pip install beautifulsoup4 requests lxml Basic Usage
import requests
from bs4 import BeautifulSoup
proxy_url = "customer-USERNAME-country-us-sid-123ABC:PASSWORD@p.shifter.io:443"
proxies = {"http": proxy_url, "https": proxy_url}
response = requests.get("https://example.com", proxies=proxies, timeout=30)
soup = BeautifulSoup(response.text, "lxml")
print(soup.title.string)
for article in soup.select("article.post"):
print(article.h2.text.strip(), "->", article.a["href"]) Features
Examples
Sticky Session + Multi-Page Crawl
Pin one residential IP for an entire pagination crawl by adding `sid-XXX` to the proxy username. Add `country-uk` and `city-london` to geo-target.
import requests
import secrets
from bs4 import BeautifulSoup
from urllib.parse import urljoin
sid = secrets.token_hex(4)
proxy_url = (
f"customer-USERNAME-country-uk-city-london-sid-{sid}-ttl-300:"
f"PASSWORD@p.shifter.io:443"
)
# Use a session so connection pooling and cookies persist across requests.
session = requests.Session()
session.proxies = {"http": proxy_url, "https": proxy_url}
session.headers.update({
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
"Accept-Language": "en-GB,en;q=0.9",
})
products = []
url = "https://example.co.uk/products"
while url:
response = session.get(url, timeout=30)
soup = BeautifulSoup(response.text, "lxml")
for card in soup.select(".product-card"):
products.append({
"title": card.select_one("h2").text.strip(),
"price": card.select_one(".price").text.strip(),
"url": urljoin(url, card.select_one("a")["href"]),
})
next_link = soup.select_one("a.next-page")
url = urljoin(url, next_link["href"]) if next_link else None
print(f"Scraped {len(products)} products") Parallel Scraping with concurrent.futures
Drop the sid for per-request rotation. ThreadPoolExecutor + requests + Shifter scales to dozens of concurrent fetches without tripping per-IP rate limits.
import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor, as_completed
# No sid -> every request gets a different residential IP.
PROXY_URL = "customer-USERNAME-country-us:PASSWORD@p.shifter.io:443"
def scrape(url: str) -> dict:
response = requests.get(
url,
proxies={"http": PROXY_URL, "https": PROXY_URL},
headers={"User-Agent": "Mozilla/5.0 AppleWebKit/537.36"},
timeout=30,
)
soup = BeautifulSoup(response.text, "lxml")
return {
"url": url,
"title": (soup.title.string or "").strip(),
"h1": [h.text.strip() for h in soup.select("h1")],
"links": [a["href"] for a in soup.select("a[href]")[:20]],
}
urls = [
"https://example.com/category/laptops",
"https://example.com/category/phones",
"https://example.com/category/tablets",
"https://example.com/category/wearables",
# ... hundreds more
]
with ThreadPoolExecutor(max_workers=16) as pool:
futures = {pool.submit(scrape, u): u for u in urls}
for f in as_completed(futures):
try:
result = f.result()
print(result["url"], "->", result["title"])
except Exception as exc:
print("error:", futures[f], exc) Robust Crawl with Retries + Backoff
Production scraping needs retries on 5xx and connection errors. Combine urllib3 Retry with Shifter and a fresh sid per attempt to defeat transient blocks.
import requests
import secrets
from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
class ShifterClient:
"""requests.Session that rotates the residential IP on retry."""
def __init__(self, country="us"):
self.country = country
self._session = requests.Session()
retry = Retry(
total=5,
backoff_factor=1.5,
status_forcelist=[429, 500, 502, 503, 504],
allowed_methods=["GET", "POST", "HEAD"],
)
adapter = HTTPAdapter(max_retries=retry, pool_connections=20)
self._session.mount("http://", adapter)
self._session.mount("https://", adapter)
def _proxy(self) -> str:
sid = secrets.token_hex(4)
return (
f"customer-USERNAME-country-{self.country}-sid-{sid}:"
f"PASSWORD@p.shifter.io:443"
)
def get(self, url: str, **kwargs) -> requests.Response:
return self._session.get(
url,
proxies={"http": self._proxy(), "https": self._proxy()},
timeout=kwargs.pop("timeout", 30),
**kwargs,
)
client = ShifterClient(country="de")
response = client.get("https://example.de/products")
soup = BeautifulSoup(response.text, "lxml")
for product in soup.select(".product"):
print(product.h2.text.strip(), product.select_one(".price").text.strip()) httpx (async) + Beautiful Soup
If you need async fanout for thousands of pages, swap requests for httpx. Same Shifter URL, native async/await, full Beautiful Soup compatibility.
# pip install httpx beautifulsoup4 lxml
import asyncio
import httpx
from bs4 import BeautifulSoup
PROXY = "customer-USERNAME-country-fr-sid-789GHI:PASSWORD@p.shifter.io:443"
async def fetch(client: httpx.AsyncClient, url: str) -> dict:
resp = await client.get(url, timeout=30)
soup = BeautifulSoup(resp.text, "lxml")
return {
"url": url,
"title": (soup.title.string or "").strip(),
"headings": [h.text.strip() for h in soup.select("h2")],
}
async def main():
async with httpx.AsyncClient(proxy=PROXY) as client:
urls = [
f"https://example.fr/products?page={i}" for i in range(1, 51)
]
results = await asyncio.gather(*[fetch(client, u) for u in urls])
for r in results:
print(r["url"], "->", r["title"])
asyncio.run(main()) Frequently asked FAQ questions
Common questions about using Shifter with Beautiful Soup.
No. Beautiful Soup is a parser — it doesn't make HTTP requests. The proxy is configured on whichever HTTP client you pair with bs4 (requests, httpx, aiohttp, urllib). Once the HTML is fetched through Shifter, you pass it to BeautifulSoup() as usual.
Start Using Shifter with Beautiful Soup
Pair Shifter's 205M+ residential and ISP proxies with Beautiful Soup for clean, expressive Python scraping. Per-request rotation, sticky sessions, and full async support via httpx.