Downloading Files Examples¶
Real-world examples of downloading files during script execution.
Example 1: Download PDF Reports¶
Download multiple PDF reports from a website:
async def main(page):
# Navigate to reports page
debug_log("Loading reports page...")
await page.goto('https://example.com/reports')
await page.wait_for_load_state('networkidle')
# Find all PDF download links
pdf_links = await page.query_selector_all('a[href$=".pdf"]')
debug_log(f"Found {len(pdf_links)} PDF files")
# Download each PDF
for i, link in enumerate(pdf_links):
# Get link details
href = await link.get_attribute('href')
title = await link.text_content()
debug_log(f"Downloading {i+1}/{len(pdf_links)}: {title}")
# Download file
result = await download_file(
href,
description=f'Report: {title.strip()}',
filename=f'report_{i+1}.pdf'
)
if result['success']:
debug_log(f" ✓ {result['filename']} ({result['file_size']} bytes)")
else:
debug_log(f" ✗ Failed: {result['error']}")
# Take screenshot after each download
await capture_screenshot(f"After downloading {title}")
debug_log("All downloads complete!")
Example 2: Download Data Exports¶
Download CSV exports after performing actions:
async def main(page):
# Login
debug_log("Logging in...")
await page.goto('https://data.example.com/login')
await page.fill('#username', 'user@example.com')
await page.fill('#password', 'password123')
await page.click('button[type="submit"]')
await page.wait_for_load_state('networkidle')
# Navigate to data page
debug_log("Opening data export page...")
await page.goto('https://data.example.com/export')
# Select export options
await page.select_option('#format', 'csv')
await page.select_option('#date_range', 'last_30_days')
# Take screenshot of export settings
await capture_screenshot("Export settings configured")
# Click export button and download
debug_log("Generating export...")
result = await download_file(
'button#export',
description='Last 30 days data export',
filename='data_export_30d.csv'
)
if result['success']:
debug_log(f"✓ Export downloaded: {result['filename']}")
debug_log(f" File size: {result['file_size']} bytes")
debug_log(f" MIME type: {result['mime_type']}")
else:
debug_log(f"✗ Export failed: {result['error']}")
await capture_screenshot("Export failure")
Example 3: Download with Dynamic URLs¶
Download files from dynamically generated URLs:
import asyncio
async def main(page):
# Navigate to dashboard
await page.goto('https://dashboard.example.com')
# Trigger report generation
debug_log("Requesting report generation...")
await page.click('button.generate-report')
# Wait for generation to complete
debug_log("Waiting for report to be ready...")
await page.wait_for_selector('.download-ready', timeout=30000)
# Get the dynamically generated download URL
download_url = await page.locator('a.download-link').get_attribute('href')
debug_log(f"Download URL: {download_url}")
# Download the file
result = await download_file(
download_url,
description='Generated report',
filename='generated_report.pdf'
)
if result['success']:
debug_log(f"✓ Downloaded: {result['filename']}")
else:
debug_log(f"✗ Failed: {result['error']}")
Example 4: Conditional Download with Validation¶
Check conditions before downloading:
async def main(page):
await page.goto('https://example.com/files')
# Check if file is available
download_button = page.locator('a.download-pdf')
count = await download_button.count()
if count == 0:
debug_log("No download available")
await capture_screenshot("No download found")
return
# Check file size before downloading
size_text = await page.locator('.file-size').text_content()
debug_log(f"File size: {size_text}")
# Download if available
result = await download_file(
'a.download-pdf',
description='Validated file download'
)
if result['success']:
debug_log(f"✓ Downloaded: {result['filename']}")
debug_log(f" Size: {result['file_size']} bytes")
debug_log(f" Type: {result['mime_type']}")
# Verify downloaded file matches expected size
if 'MB' in size_text:
expected_mb = float(size_text.split('MB')[0].strip())
actual_mb = result['file_size'] / (1024 * 1024)
debug_log(f" Expected: ~{expected_mb}MB, Actual: {actual_mb:.2f}MB")
else:
debug_log(f"✗ Download failed: {result['error']}")
Example 5: Download Multiple File Types¶
Download different file types from a page:
async def main(page):
await page.goto('https://example.com/downloads')
await page.wait_for_load_state('networkidle')
# Define file types to download
file_types = [
{'selector': 'a[href$=".pdf"]', 'type': 'PDF'},
{'selector': 'a[href$=".csv"]', 'type': 'CSV'},
{'selector': 'a[href$=".json"]', 'type': 'JSON'},
{'selector': 'a[href$=".xlsx"]', 'type': 'Excel'}
]
# Download each file type
for file_type in file_types:
links = await page.query_selector_all(file_type['selector'])
debug_log(f"Found {len(links)} {file_type['type']} file(s)")
for i, link in enumerate(links):
href = await link.get_attribute('href')
text = await link.text_content()
result = await download_file(
href,
description=f"{file_type['type']}: {text.strip()}"
)
if result['success']:
debug_log(f" ✓ {result['filename']}")
else:
debug_log(f" ✗ Failed: {result['error']}")
await capture_screenshot("All downloads complete")
Example 6: Download with Rate Limiting¶
Add delays between downloads to avoid rate limiting:
import asyncio
async def main(page):
await page.goto('https://api.example.com/files')
# Get all download links
links = await page.query_selector_all('a.download')
debug_log(f"Found {len(links)} files to download")
# Download with delay between each
for i, link in enumerate(links):
href = await link.get_attribute('href')
filename = href.split('/')[-1]
debug_log(f"Downloading {i+1}/{len(links)}: {filename}")
result = await download_file(
href,
description=f'File {i+1} of {len(links)}'
)
if result['success']:
debug_log(f" ✓ Success")
else:
debug_log(f" ✗ Failed: {result['error']}")
# Wait 2 seconds between downloads
if i < len(links) - 1: # Don't wait after last download
debug_log(" Waiting 2 seconds...")
await asyncio.sleep(2)
debug_log("All downloads complete!")
Example 7: Download with Error Recovery¶
Retry failed downloads with exponential backoff:
import asyncio
async def main(page):
await page.goto('https://example.com/files')
files_to_download = [
'https://example.com/file1.pdf',
'https://example.com/file2.csv',
'https://example.com/file3.json'
]
for file_url in files_to_download:
filename = file_url.split('/')[-1]
max_retries = 3
retry_count = 0
success = False
while retry_count < max_retries and not success:
if retry_count > 0:
wait_time = 2 ** retry_count # Exponential backoff
debug_log(f"Retry {retry_count}/{max_retries} after {wait_time}s...")
await asyncio.sleep(wait_time)
debug_log(f"Downloading {filename}...")
result = await download_file(
file_url,
description=f'{filename} (attempt {retry_count + 1})'
)
if result['success']:
debug_log(f" ✓ Success on attempt {retry_count + 1}")
success = True
else:
debug_log(f" ✗ Failed: {result['error']}")
retry_count += 1
if not success:
debug_log(f" ✗✗ All retries failed for {filename}")
await capture_screenshot(f"Failed to download {filename}")
Example 8: Scrape Data and Download Files¶
Combine data scraping with file downloads:
async def main(page):
await page.goto('https://example.com/products')
# Get all product cards
products = await page.query_selector_all('.product-card')
debug_log(f"Found {len(products)} products")
scraped_products = []
for i, product in enumerate(products):
# Extract product data
name = await product.locator('.product-name').text_content()
price = await product.locator('.product-price').text_content()
pdf_link = await product.locator('a.download-spec').get_attribute('href')
debug_log(f"Product {i+1}: {name} - {price}")
# Download product specification
if pdf_link:
result = await download_file(
pdf_link,
description=f'Spec sheet: {name}',
filename=f'spec_{i+1}_{name.replace(" ", "_")}.pdf'
)
# Store data with download info
scraped_products.append({
'name': name,
'price': price,
'spec_downloaded': result['success'],
'spec_filename': result.get('filename', 'N/A')
})
else:
scraped_products.append({
'name': name,
'price': price,
'spec_downloaded': False,
'spec_filename': 'No spec available'
})
# Save scraped data
scrape_data({
'products': scraped_products,
'total_products': len(products)
})
debug_log(f"Scraped {len(scraped_products)} products")
await capture_screenshot("Scraping complete")
Best Practices Demonstrated¶
These examples show:
- Error Handling: Check
result['success']before proceeding - Logging: Use
debug_log()to track progress - Screenshots: Capture screenshots at key moments
- Rate Limiting: Add delays between downloads
- Retry Logic: Implement exponential backoff for failures
- Validation: Check conditions before downloading
- Organization: Use descriptive filenames and descriptions
- Integration: Combine downloads with data scraping
See Also¶
- Downloading Files - Complete guide
- Available Functions - API reference
- Debug Logging - Logging examples
- Screenshots - Screenshot examples