Use the `requests` library for downloading files as it provides more control over requests and responses, including error handling and session management.
Always check the `status_code` of the response object to ensure the HTTP request was successful before proceeding with file operations.
When downloading large files, use the `stream=True` parameter in `requests.get()` to download the content in chunks, preventing large memory usage.
Consider using the `tqdm` library to add a progress bar when downloading files, which improves the user experience by providing visual feedback on the download progress.
import requests # Import requests library
# Download file using requests
url = "https://sandbox.oxylabs.io/products/sample.pdf"
response = requests.get(url)
with open("sample.pdf", "wb") as file:
file.write(response.content)
import urllib.request # Import urllib library for another method
# Download file using urllib
urllib.request.urlretrieve(url, "sample_urllib.pdf")
try:
from tqdm import tqdm # Import tqdm for progress bar (optional)
# Download with progress bar
response = requests.get(url, stream=True)
total_size_in_bytes = int(response.headers.get('content-length', 0))
block_size = 1024 # 1 Kibibyte
progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
with open("sample_with_progress.pdf", "wb") as file:
for data in response.iter_content(block_size):
progress_bar.update(len(data))
file.write(data)
progress_bar.close()
except ImportError:
print("tqdm library is not installed. Install it to see the progress bar.")Ensure you handle exceptions such as `ConnectionError` or `Timeout` when using `requests.get()` to maintain robustness in network-related failures.
Check the 'content-length' header against the downloaded file size to prevent incomplete or corrupted downloads
Set a timeout in `requests.get()` to avoid hanging indefinitely if the server does not respond or is too slow.
Use `os.path` to dynamically set the file path and name, ensuring compatibility across different operating systems.
# Good Example: Handling exceptions with requests.get()
try:
response = requests.get(url, timeout=10) # Set timeout
response.raise_for_status() # Check for HTTP errors
except requests.exceptions.RequestException as e:
print(f"Error downloading file: {e}")
# Bad Example: No exception handling or timeout
response = requests.get(url)
with open("sample.pdf", "wb") as file:
file.write(response.content)
# Good Example: Validate 'content-length' before downloading
response = requests.get(url, stream=True)
content_length = response.headers.get('content-length')
total_data = bytearray()
if content_length:
for chunk in response.iter_content(1024):
total_data.extend(chunk)
if len(total_data) == int(content_length):
with open("validated_file.pdf", "wb") as file:
file.write(total_data)
else:
print("Content length mismatch.")
# Bad Example: Ignoring 'content-length' validation
response = requests.get(url)
with open("unvalidated_file.pdf", "wb") as file:
file.write(response.content)
# Good Example: Using os.path for file paths
import os
filename = os.path.join(os.getcwd(), "downloaded_file.pdf")
response = requests.get(url)
with open(filename, "wb") as file:
file.write(response.content)
# Bad Example: Hardcoding file paths
response = requests.get(url)
with open("/absolute/path/downloaded_file.pdf", "wb") as file:
file.write(response.content)
# Good Example: Setting a timeout in requests.get()
try:
response = requests.get(url, timeout=5) # Timeout after 5 seconds
with open("timed_file.pdf", "wb") as file:
file.write(response.content)
except requests.Timeout:
print("The request timed out.")
# Bad Example: No timeout set
response = requests.get(url)
with open("no_timeout_file.pdf", "wb") as file:
file.write(response.content)


Get the latest news from data gathering world
Scale up your business with Oxylabs®
Proxies
Advanced proxy solutions
Data Collection
Datasets
Resources
Innovation hub