Defeating Captcha With Python
CAPTCHAs (Completely Automated Public Turing test to tell Computers and Humans Apart) are widely used to prevent automated access to websites. However, there are legitimate reasons for automating interactions with web services, such as testing or data collection for research. In this article, we’ll explore various techniques to bypass CAPTCHAs using Python, with practical code examples and use cases.
1. Optical Character Recognition (OCR)
One of the simplest forms of CAPTCHAs involves distorted text. We can use OCR libraries like Tesseract to attempt to read these.
import pytesseract
from PIL import Image
def solve_text_captcha(image_path):
image = Image.open(image_path)
text = pytesseract.image_to_string(image)
return text.strip()
# Usage
captcha_text = solve_text_captcha('captcha.png')
print(f"Solved CAPTCHA: {captcha_text}")
# Example use case: Automating form submission
from selenium import webdriver
def submit_form_with_captcha(url, form_data, captcha_image_path):
driver = webdriver.Chrome()
driver.get(url)
# Fill form data
for field, value in form_data.items():
driver.find_element_by_name(field).send_keys(value)
# Solve CAPTCHA
captcha_solution = solve_text_captcha(captcha_image_path)
driver.find_element_by_name('captcha').send_keys(captcha_solution)
# Submit form
driver.find_element_by_id('submit-button').click()
driver.quit()
# Usage
form_data = {'username': 'testuser', 'email': '[email protected]'}
submit_form_with_captcha('https://example.com/form', form_data, 'captcha.png')
2. Machine Learning Approaches
For more complex image-based CAPTCHAs, we can train machine learning models to recognize patterns.
import numpy as np
from PIL import Image
from tensorflow.keras.models import load_model
def preprocess_image(image_path):
img = Image.open(image_path).convert('L')
img = img.resize((100, 40))
img_array = np.array(img) / 255.0
return img_array.reshape(1, 100, 40, 1)
model = load_model('captcha_model.h5')
def solve_captcha(image_path):
processed_image = preprocess_image(image_path)
prediction = model.predict(processed_image)
return ''.join([chr(i) for i in np.argmax(prediction, axis=2).flatten()])
# Usage
result = solve_captcha('complex_captcha.png')
print(f"Solved complex CAPTCHA: {result}")
# Example use case: Batch processing of CAPTCHAs
import os
def batch_solve_captchas(directory):
results = {}
for filename in os.listdir(directory):
if filename.endswith('.png'):
file_path = os.path.join(directory, filename)
solution = solve_captcha(file_path)
results[filename] = solution
return results
# Usage
batch_results = batch_solve_captchas('captcha_images/')
for filename, solution in batch_results.items():
print(f"{filename}: {solution}")
3. Audio CAPTCHA Solving
Some CAPTCHAs offer audio alternatives. We can use speech recognition to solve these.
import speech_recognition as sr
from pydub import AudioSegment
def solve_audio_captcha(audio_file):
# Convert mp3 to wav
sound = AudioSegment.from_mp3(audio_file)
sound.export("temp.wav", format="wav")
# Recognize speech
recognizer = sr.Recognizer()
with sr.AudioFile("temp.wav") as source:
audio = recognizer.record(source)
try:
text = recognizer.recognize_google(audio)
return text
except sr.UnknownValueError:
return "Could not understand audio"
except sr.RequestError:
return "Could not request results"
# Usage
audio_captcha_text = solve_audio_captcha("audio_captcha.mp3")
print(f"Solved audio CAPTCHA: {audio_captcha_text}")
# Example use case: Accessibility testing
def test_audio_captcha_accessibility(url):
driver = webdriver.Chrome()
driver.get(url)
# Find and click audio CAPTCHA option
driver.find_element_by_id('audio-captcha-option').click()
# Download audio file
audio_src = driver.find_element_by_id('audio-captcha').get_attribute('src')
urllib.request.urlretrieve(audio_src, "temp_audio.mp3")
# Solve audio CAPTCHA
solution = solve_audio_captcha("temp_audio.mp3")
# Enter solution
driver.find_element_by_id('captcha-input').send_keys(solution)
# Submit and check result
driver.find_element_by_id('submit-button').click()
success = "Success" in driver.page_source
driver.quit()
return success
# Usage
accessibility_result = test_audio_captcha_accessibility('https://example.com/audio-captcha')
print(f"Audio CAPTCHA accessibility test {'passed' if accessibility_result else 'failed'}")
4. Using CAPTCHA Solving Services
For high-volume needs, there are services that offer CAPTCHA solving through their APIs.
import requests
def solve_captcha_with_service(api_key, captcha_image):
url = "https://api.captcha-solving-service.com/solve"
files = {'image': open(captcha_image, 'rb')}
data = {'api_key': api_key}
response = requests.post(url, files=files, data=data)
if response.status_code == 200:
return response.json()['solution']
else:
return f"Error: {response.status_code}"
# Usage
api_key = "your_api_key_here"
solution = solve_captcha_with_service(api_key, "captcha.png")
print(f"Service solved CAPTCHA: {solution}")
# Example use case: Large-scale web scraping
import csv
def scrape_website_with_captcha(url, num_pages, api_key):
data = []
driver = webdriver.Chrome()
for page in range(num_pages):
driver.get(f"{url}?page={page}")
# Check if CAPTCHA is present
if "captcha" in driver.page_source.lower():
# Capture CAPTCHA image
captcha_element = driver.find_element_by_id('captcha-image')
captcha_element.screenshot('temp_captcha.png')
# Solve CAPTCHA
solution = solve_captcha_with_service(api_key, 'temp_captcha.png')
# Enter solution and submit
driver.find_element_by_id('captcha-input').send_keys(solution)
driver.find_element_by_id('submit-captcha').click()
# Extract data from page
items = driver.find_elements_by_class_name('item')
for item in items:
data.append({
'title': item.find_element_by_class_name('title').text,
'price': item.find_element_by_class_name('price').text
})
driver.quit()
# Save data to CSV
with open('scraped_data.csv', 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=['title', 'price'])
writer.writeheader()
writer.writerows(data)
# Usage
scrape_website_with_captcha('https://example.com/products', 10, 'your_api_key_here')
5. Browser Automation with Selenium
Sometimes, the best approach is to mimic human behavior using browser automation.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def solve_recaptcha():
driver = webdriver.Chrome()
driver.get("https://www.google.com/recaptcha/api2/demo")
# Wait for the reCAPTCHA iframe to be available
WebDriverWait(driver, 10).until(
EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "iframe[src^='https://www.google.com/recaptcha/api2/anchor']"))
)
# Click the reCAPTCHA checkbox
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.ID, "recaptcha-anchor"))
).click()
# Switch back to the main content
driver.switch_to.default_content()
# Check if the CAPTCHA was solved
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, ".recaptcha-success"))
)
print("reCAPTCHA solved successfully!")
driver.quit()
# Usage
solve_recaptcha()
# Example use case: Automated login with reCAPTCHA
def login_with_recaptcha(url, username, password):
driver = webdriver.Chrome()
driver.get(url)
# Fill in login details
driver.find_element_by_id('username').send_keys(username)
driver.find_element_by_id('password').send_keys(password)
# Solve reCAPTCHA
WebDriverWait(driver, 10).until(
EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "iframe[src^='https://www.google.com/recaptcha/api2/anchor']"))
)
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.ID, "recaptcha-anchor"))
).click()
driver.switch_to.default_content()
# Wait for reCAPTCHA to be solved
WebDriverWait(driver, 60).until(
EC.presence_of_element_located((By.CSS_SELECTOR, ".recaptcha-success"))
)
# Submit login form
driver.find_element_by_id('login-button').click()
# Check if login was successful
success = "Welcome" in driver.page_source
driver.quit()
return success
# Usage
login_success = login_with_recaptcha('https://example.com/login', 'testuser', 'password123')
print(f"Login {'successful' if login_success else 'failed'}")
Conclusion
CAPTCHA-solving techniques are constantly evolving, as are the CAPTCHAs themselves. It’s crucial to stay updated with the latest methods and always use these techniques responsibly. Here’s a simplified flow of a CAPTCHA-solving process:
graph TD A[Encounter CAPTCHA] --> B{Type of CAPTCHA} B -->|Text-based| C[Use OCR] B -->|Image-based| D[Use ML Model] B -->|Audio| E[Use Speech Recognition] B -->|reCAPTCHA| F[Use Browser Automation] C --> G[Submit Solution] D --> G E --> G F --> G G --> H{Successful?} H -->|Yes| I[Continue with Task] H -->|No| J[Try Alternative Method] J --> B
Remember, while these techniques can be powerful, it’s essential to use them ethically and in compliance with website terms of service. The examples provided here are for educational purposes and should be used responsibly in scenarios where automated interaction is permitted.
For those interested in the mathematical aspects of CAPTCHA solving, particularly in machine learning approaches, here’s a brief explanation of the neural network output layer for character recognition: