Defeating Captcha With Python

⚠️
This article is for educational purposes only. Always respect website terms of service and legal regulations when interacting with CAPTCHAs. Unauthorized CAPTCHA solving may violate terms of service and legal regulations.

CAPTCHAs (Completely Automated Public Turing test to tell Computers and Humans Apart) are widely used to prevent automated access to websites. However, there are legitimate reasons for automating interactions with web services, such as testing or data collection for research. In this article, we’ll explore various techniques to bypass CAPTCHAs using Python, with practical code examples and use cases.

1. Optical Character Recognition (OCR)

One of the simplest forms of CAPTCHAs involves distorted text. We can use OCR libraries like Tesseract to attempt to read these.

ocr_captcha.py
import pytesseract
from PIL import Image

def solve_text_captcha(image_path):
    image = Image.open(image_path)
    text = pytesseract.image_to_string(image)
    return text.strip()

# Usage
captcha_text = solve_text_captcha('captcha.png')
print(f"Solved CAPTCHA: {captcha_text}")

# Example use case: Automating form submission
from selenium import webdriver

def submit_form_with_captcha(url, form_data, captcha_image_path):
    driver = webdriver.Chrome()
    driver.get(url)

    # Fill form data
    for field, value in form_data.items():
        driver.find_element_by_name(field).send_keys(value)

    # Solve CAPTCHA
    captcha_solution = solve_text_captcha(captcha_image_path)
    driver.find_element_by_name('captcha').send_keys(captcha_solution)

    # Submit form
    driver.find_element_by_id('submit-button').click()

    driver.quit()

# Usage
form_data = {'username': 'testuser', 'email': '[email protected]'}
submit_form_with_captcha('https://example.com/form', form_data, 'captcha.png')

2. Machine Learning Approaches

For more complex image-based CAPTCHAs, we can train machine learning models to recognize patterns.

ml_captcha_solver.py
import numpy as np
from PIL import Image
from tensorflow.keras.models import load_model

def preprocess_image(image_path):
    img = Image.open(image_path).convert('L')
    img = img.resize((100, 40))
    img_array = np.array(img) / 255.0
    return img_array.reshape(1, 100, 40, 1)

model = load_model('captcha_model.h5')

def solve_captcha(image_path):
    processed_image = preprocess_image(image_path)
    prediction = model.predict(processed_image)
    return ''.join([chr(i) for i in np.argmax(prediction, axis=2).flatten()])

# Usage
result = solve_captcha('complex_captcha.png')
print(f"Solved complex CAPTCHA: {result}")

# Example use case: Batch processing of CAPTCHAs
import os

def batch_solve_captchas(directory):
    results = {}
    for filename in os.listdir(directory):
        if filename.endswith('.png'):
            file_path = os.path.join(directory, filename)
            solution = solve_captcha(file_path)
            results[filename] = solution
    return results

# Usage
batch_results = batch_solve_captchas('captcha_images/')
for filename, solution in batch_results.items():
    print(f"{filename}: {solution}")

3. Audio CAPTCHA Solving

Some CAPTCHAs offer audio alternatives. We can use speech recognition to solve these.

audio_captcha_solver.py
import speech_recognition as sr
from pydub import AudioSegment

def solve_audio_captcha(audio_file):
    # Convert mp3 to wav
    sound = AudioSegment.from_mp3(audio_file)
    sound.export("temp.wav", format="wav")

    # Recognize speech
    recognizer = sr.Recognizer()
    with sr.AudioFile("temp.wav") as source:
        audio = recognizer.record(source)

    try:
        text = recognizer.recognize_google(audio)
        return text
    except sr.UnknownValueError:
        return "Could not understand audio"
    except sr.RequestError:
        return "Could not request results"

# Usage
audio_captcha_text = solve_audio_captcha("audio_captcha.mp3")
print(f"Solved audio CAPTCHA: {audio_captcha_text}")

# Example use case: Accessibility testing
def test_audio_captcha_accessibility(url):
    driver = webdriver.Chrome()
    driver.get(url)

    # Find and click audio CAPTCHA option
    driver.find_element_by_id('audio-captcha-option').click()

    # Download audio file
    audio_src = driver.find_element_by_id('audio-captcha').get_attribute('src')
    urllib.request.urlretrieve(audio_src, "temp_audio.mp3")

    # Solve audio CAPTCHA
    solution = solve_audio_captcha("temp_audio.mp3")

    # Enter solution
    driver.find_element_by_id('captcha-input').send_keys(solution)

    # Submit and check result
    driver.find_element_by_id('submit-button').click()
    success = "Success" in driver.page_source

    driver.quit()
    return success

# Usage
accessibility_result = test_audio_captcha_accessibility('https://example.com/audio-captcha')
print(f"Audio CAPTCHA accessibility test {'passed' if accessibility_result else 'failed'}")

4. Using CAPTCHA Solving Services

For high-volume needs, there are services that offer CAPTCHA solving through their APIs.

captcha_service.py
import requests

def solve_captcha_with_service(api_key, captcha_image):
    url = "https://api.captcha-solving-service.com/solve"
    files = {'image': open(captcha_image, 'rb')}
    data = {'api_key': api_key}

    response = requests.post(url, files=files, data=data)

    if response.status_code == 200:
        return response.json()['solution']
    else:
        return f"Error: {response.status_code}"

# Usage
api_key = "your_api_key_here"
solution = solve_captcha_with_service(api_key, "captcha.png")
print(f"Service solved CAPTCHA: {solution}")

# Example use case: Large-scale web scraping
import csv

def scrape_website_with_captcha(url, num_pages, api_key):
    data = []
    driver = webdriver.Chrome()

    for page in range(num_pages):
        driver.get(f"{url}?page={page}")

        # Check if CAPTCHA is present
        if "captcha" in driver.page_source.lower():
            # Capture CAPTCHA image
            captcha_element = driver.find_element_by_id('captcha-image')
            captcha_element.screenshot('temp_captcha.png')

            # Solve CAPTCHA
            solution = solve_captcha_with_service(api_key, 'temp_captcha.png')

            # Enter solution and submit
            driver.find_element_by_id('captcha-input').send_keys(solution)
            driver.find_element_by_id('submit-captcha').click()

        # Extract data from page
        items = driver.find_elements_by_class_name('item')
        for item in items:
            data.append({
                'title': item.find_element_by_class_name('title').text,
                'price': item.find_element_by_class_name('price').text
            })

    driver.quit()

    # Save data to CSV
    with open('scraped_data.csv', 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=['title', 'price'])
        writer.writeheader()
        writer.writerows(data)

# Usage
scrape_website_with_captcha('https://example.com/products', 10, 'your_api_key_here')

5. Browser Automation with Selenium

Sometimes, the best approach is to mimic human behavior using browser automation.

selenium_captcha.py
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def solve_recaptcha():
    driver = webdriver.Chrome()
    driver.get("https://www.google.com/recaptcha/api2/demo")

    # Wait for the reCAPTCHA iframe to be available
    WebDriverWait(driver, 10).until(
        EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "iframe[src^='https://www.google.com/recaptcha/api2/anchor']"))
    )

    # Click the reCAPTCHA checkbox
    WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.ID, "recaptcha-anchor"))
    ).click()

    # Switch back to the main content
    driver.switch_to.default_content()

    # Check if the CAPTCHA was solved
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, ".recaptcha-success"))
    )

    print("reCAPTCHA solved successfully!")
    driver.quit()

# Usage
solve_recaptcha()

# Example use case: Automated login with reCAPTCHA
def login_with_recaptcha(url, username, password):
    driver = webdriver.Chrome()
    driver.get(url)

    # Fill in login details
    driver.find_element_by_id('username').send_keys(username)
    driver.find_element_by_id('password').send_keys(password)

    # Solve reCAPTCHA
    WebDriverWait(driver, 10).until(
        EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "iframe[src^='https://www.google.com/recaptcha/api2/anchor']"))
    )
    WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.ID, "recaptcha-anchor"))
    ).click()
    driver.switch_to.default_content()

    # Wait for reCAPTCHA to be solved
    WebDriverWait(driver, 60).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, ".recaptcha-success"))
    )

    # Submit login form
    driver.find_element_by_id('login-button').click()

    # Check if login was successful
    success = "Welcome" in driver.page_source

    driver.quit()
    return success

# Usage
login_success = login_with_recaptcha('https://example.com/login', 'testuser', 'password123')
print(f"Login {'successful' if login_success else 'failed'}")

Conclusion

CAPTCHA-solving techniques are constantly evolving, as are the CAPTCHAs themselves. It’s crucial to stay updated with the latest methods and always use these techniques responsibly. Here’s a simplified flow of a CAPTCHA-solving process:

graph TD
    A[Encounter CAPTCHA] --> B{Type of CAPTCHA}
    B -->|Text-based| C[Use OCR]
    B -->|Image-based| D[Use ML Model]
    B -->|Audio| E[Use Speech Recognition]
    B -->|reCAPTCHA| F[Use Browser Automation]
    C --> G[Submit Solution]
    D --> G
    E --> G
    F --> G
    G --> H{Successful?}
    H -->|Yes| I[Continue with Task]
    H -->|No| J[Try Alternative Method]
    J --> B

Remember, while these techniques can be powerful, it’s essential to use them ethically and in compliance with website terms of service. The examples provided here are for educational purposes and should be used responsibly in scenarios where automated interaction is permitted.

For those interested in the mathematical aspects of CAPTCHA solving, particularly in machine learning approaches, here’s a brief explanation of the neural network output layer for character recognition: