Integration

Use Shifter with Beautiful Soup

Pair Shifter's residential and ISP proxies with Beautiful Soup for clean, expressive Python scraping. Beautiful Soup handles HTML parsing, Shifter handles the residential IPs — no headless browser required.

Quick Start

Install

pip install beautifulsoup4 requests lxml

Basic Usage

import requests
from bs4 import BeautifulSoup

proxy_url = "customer-USERNAME-country-us-sid-123ABC:PASSWORD@p.shifter.io:443"
proxies   = {"http": proxy_url, "https": proxy_url}

response = requests.get("https://example.com", proxies=proxies, timeout=30)
soup = BeautifulSoup(response.text, "lxml")

print(soup.title.string)
for article in soup.select("article.post"):
    print(article.h2.text.strip(), "->", article.a["href"])

Features

Pairs cleanly with requests, httpx, aiohttp, and any Python HTTP client that supports a proxies dict

Per-request rotation by default, with `sid` for sticky sessions and `ttl-N` for timed pins of N seconds

Compatible with bs4 4.x and Python 3.7+ — works with both lxml and html.parser backends

Geo-targeting in 195+ countries via username parameters — country, region, city, ASN

Order-of-magnitude faster than headless-browser scraping for static or JS-light targets

Drop-in for Scrapy, FastAPI scrapers, Airflow tasks, AWS Lambda, and any Python data pipeline

Examples

Sticky Session + Multi-Page Crawl

Pin one residential IP for an entire pagination crawl by adding `sid-XXX` to the proxy username. Add `country-uk` and `city-london` to geo-target.

import requests
import secrets
from bs4 import BeautifulSoup
from urllib.parse import urljoin

sid = secrets.token_hex(4)

proxy_url = (
    f"customer-USERNAME-country-uk-city-london-sid-{sid}-ttl-300:"
    f"PASSWORD@p.shifter.io:443"
)

# Use a session so connection pooling and cookies persist across requests.
session = requests.Session()
session.proxies = {"http": proxy_url, "https": proxy_url}
session.headers.update({
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
    "Accept-Language": "en-GB,en;q=0.9",
})

products  = []
url       = "https://example.co.uk/products"

while url:
    response = session.get(url, timeout=30)
    soup     = BeautifulSoup(response.text, "lxml")

    for card in soup.select(".product-card"):
        products.append({
            "title": card.select_one("h2").text.strip(),
            "price": card.select_one(".price").text.strip(),
            "url":   urljoin(url, card.select_one("a")["href"]),
        })

    next_link = soup.select_one("a.next-page")
    url       = urljoin(url, next_link["href"]) if next_link else None

print(f"Scraped {len(products)} products")

Parallel Scraping with concurrent.futures

Drop the sid for per-request rotation. ThreadPoolExecutor + requests + Shifter scales to dozens of concurrent fetches without tripping per-IP rate limits.

import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor, as_completed

# No sid -> every request gets a different residential IP.
PROXY_URL = "customer-USERNAME-country-us:PASSWORD@p.shifter.io:443"

def scrape(url: str) -> dict:
    response = requests.get(
        url,
        proxies={"http": PROXY_URL, "https": PROXY_URL},
        headers={"User-Agent": "Mozilla/5.0 AppleWebKit/537.36"},
        timeout=30,
    )
    soup = BeautifulSoup(response.text, "lxml")

    return {
        "url":   url,
        "title": (soup.title.string or "").strip(),
        "h1":    [h.text.strip() for h in soup.select("h1")],
        "links": [a["href"] for a in soup.select("a[href]")[:20]],
    }

urls = [
    "https://example.com/category/laptops",
    "https://example.com/category/phones",
    "https://example.com/category/tablets",
    "https://example.com/category/wearables",
    # ... hundreds more
]

with ThreadPoolExecutor(max_workers=16) as pool:
    futures = {pool.submit(scrape, u): u for u in urls}
    for f in as_completed(futures):
        try:
            result = f.result()
            print(result["url"], "->", result["title"])
        except Exception as exc:
            print("error:", futures[f], exc)

Robust Crawl with Retries + Backoff

Production scraping needs retries on 5xx and connection errors. Combine urllib3 Retry with Shifter and a fresh sid per attempt to defeat transient blocks.

import requests
import secrets
from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

class ShifterClient:
    """requests.Session that rotates the residential IP on retry."""

    def __init__(self, country="us"):
        self.country = country
        self._session = requests.Session()

        retry = Retry(
            total=5,
            backoff_factor=1.5,
            status_forcelist=[429, 500, 502, 503, 504],
            allowed_methods=["GET", "POST", "HEAD"],
        )
        adapter = HTTPAdapter(max_retries=retry, pool_connections=20)
        self._session.mount("http://",  adapter)
        self._session.mount("https://", adapter)

    def _proxy(self) -> str:
        sid = secrets.token_hex(4)
        return (
            f"customer-USERNAME-country-{self.country}-sid-{sid}:"
            f"PASSWORD@p.shifter.io:443"
        )

    def get(self, url: str, **kwargs) -> requests.Response:
        return self._session.get(
            url,
            proxies={"http": self._proxy(), "https": self._proxy()},
            timeout=kwargs.pop("timeout", 30),
            **kwargs,
        )

client   = ShifterClient(country="de")
response = client.get("https://example.de/products")
soup     = BeautifulSoup(response.text, "lxml")

for product in soup.select(".product"):
    print(product.h2.text.strip(), product.select_one(".price").text.strip())

httpx (async) + Beautiful Soup

If you need async fanout for thousands of pages, swap requests for httpx. Same Shifter URL, native async/await, full Beautiful Soup compatibility.

# pip install httpx beautifulsoup4 lxml
import asyncio
import httpx
from bs4 import BeautifulSoup

PROXY = "customer-USERNAME-country-fr-sid-789GHI:PASSWORD@p.shifter.io:443"

async def fetch(client: httpx.AsyncClient, url: str) -> dict:
    resp = await client.get(url, timeout=30)
    soup = BeautifulSoup(resp.text, "lxml")
    return {
        "url":      url,
        "title":    (soup.title.string or "").strip(),
        "headings": [h.text.strip() for h in soup.select("h2")],
    }

async def main():
    async with httpx.AsyncClient(proxy=PROXY) as client:
        urls = [
            f"https://example.fr/products?page={i}" for i in range(1, 51)
        ]
        results = await asyncio.gather(*[fetch(client, u) for u in urls])

    for r in results:
        print(r["url"], "->", r["title"])

asyncio.run(main())

FAQ

Frequently asked FAQ questions

Common questions about using Shifter with Beautiful Soup.

No. Beautiful Soup is a parser — it doesn't make HTTP requests. The proxy is configured on whichever HTTP client you pair with bs4 (requests, httpx, aiohttp, urllib). Once the HTML is fetched through Shifter, you pass it to BeautifulSoup() as usual.

Get started

Start Using Shifter with Beautiful Soup

Pair Shifter's 205M+ residential and ISP proxies with Beautiful Soup for clean, expressive Python scraping. Per-request rotation, sticky sessions, and full async support via httpx.

Try Shifter for FreeSet up in minutes. Cancel anytime.

Use Shifter with Beautiful Soup

Quick Start

Features

Examples

Sticky Session + Multi-Page Crawl

Parallel Scraping with concurrent.futures

Robust Crawl with Retries + Backoff

httpx (async) + Beautiful Soup

Frequently asked FAQ questions

Does Beautiful Soup need a special proxy setup?

How do I use Shifter with requests + Beautiful Soup?

Should I use Beautiful Soup or Scrapy?

How do I keep the same IP across multiple bs4 fetches?

Can I run async scrapes with Beautiful Soup?

Does Shifter work in AWS Lambda or Cloud Functions?

Start Using Shifter with Beautiful Soup