使用 OpenCV 破解滑块验证教程

滑块验证码是一种常见的反自动化机制,通常用于防止机器人操作。本教程将指导你如何使用 OpenCV 和 Playwright 破解滑块验证码。我们将通过以下步骤实现:

  1. 获取滑块和背景图片
  2. 计算滑块缺口位置
  3. 生成滑动轨迹
  4. 模拟滑块滑动
  5. 处理验证码

1. 环境准备

首先,确保你已经安装了以下 Python 库:

pip install opencv-python-headless numpy playwright

2. 获取滑块和背景图片

使用 Playwright 打开目标网页,并获取滑块和背景图片的 Base64 编码。

from playwright.sync_api import sync_playwright
import cv2
import numpy as np

def decode_base64_image(base64_string):
    import base64
    from io import BytesIO
    if base64_string.startswith("data:image"):
        base64_string = base64_string.split(",")[1]
    img_data = base64.b64decode(base64_string)
    img_array = np.frombuffer(img_data, dtype=np.uint8)
    img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
    return img

def get_images(page):
	# 填入背景图片xpath地址
    bg_img = page.locator('xpath=')
    # 填入滑块图片xpath地址
    slide_block = page.locator('xpath=')
    bg_img_base64 = bg_img.get_attribute("src")
    slide_block_base64 = slide_block.get_attribute("src")
    bg_img_decoded = decode_base64_image(bg_img_base64)
    slide_block_decoded = decode_base64_image(slide_block_base64)
    return bg_img_decoded, slide_block_decoded

3. 计算滑块缺口位置

使用 OpenCV 的模板匹配功能计算滑块缺口的位置。

def get_notch_location(slide_img, bg_img):
    image1_resize = cv2.resize(bg_img, (310, 155))
    image2_resize = cv2.resize(slide_img, (47, 155))
    ret, thresholded_image = cv2.threshold(image1_resize, 220, 255, cv2.THRESH_BINARY)
    gray_image1 = cv2.cvtColor(thresholded_image, cv2.COLOR_BGR2GRAY)
    denoised_image1 = cv2.equalizeHist(gray_image1)
    edges = cv2.Canny(denoised_image1, threshold1=500, threshold2=900)
    gray_image2 = cv2.cvtColor(image2_resize, cv2.COLOR_BGR2GRAY)
    denoised_image2 = cv2.equalizeHist(gray_image2)
    edges2 = cv2.Canny(denoised_image2, threshold1=650, threshold2=900)
    result = cv2.matchTemplate(edges, edges2, cv2.TM_CCOEFF_NORMED)
    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
    return max_loc[0]

4. 生成滑动轨迹

生成一个模拟人类滑动的轨迹。

import numpy as np

def get_track(distance):
    track = []
    current = 0
    mid = distance * 4 / 5
    t = 0.2
    v = 0
    while current < distance:
        if current < mid:
            a = 2
        else:
            a = -3
        v0 = v
        v = v0 + a * t
        move = v0 * t + 0.5 * a * t * t
        current += move
        track.append(round(move))
    for _ in range(5):
        track.append(np.random.randint(-2, 3))
    return track

5. 模拟滑块滑动

使用 Playwright 模拟鼠标滑动滑块。

import time

def slide_block(page, corrected_distance):
    slide_block_click = page.locator(".verify-move-block___3Pb5u")
    bounding_box = slide_block_click.bounding_box()
    if bounding_box:
        x = bounding_box["x"] + bounding_box["width"] / 2
        y = bounding_box["y"] + bounding_box["height"] / 2
        page.mouse.move(x, y)
        page.mouse.down()
        tracks = get_track(corrected_distance)
        for track in tracks:
            x += track
            page.mouse.move(x, y, steps=5)
            time.sleep(0.01)
        page.mouse.up()

6. 主运行逻辑

将所有步骤整合到主函数中。

def run(playwright: Playwright) -> None:
    browser = playwright.chromium.launch(headless=False)
    context = browser.new_context()
    page = context.new_page()
    page.goto("输入登陆地址")
    page.get_by_text("短信登录").click()
    page.get_by_role("textbox", name="请输入手机号").fill("输入你的手机号")
    page.get_by_text("发送验证码").click()
    page.wait_for_load_state("networkidle")
    retry_count = 3

    while retry_count > 0:
        bg_img_decoded, slide_block_decoded = get_images(page)
        notch_x = get_notch_location(slide_block_decoded, bg_img_decoded)
        bg_img_width = bg_img_decoded.shape[1]
        corrected_distance = notch_x + 10
        slide_block(page, corrected_distance)
        page.wait_for_timeout(1000)
        page.wait_for_load_state("networkidle")

        if not page.locator(".verify-dialog").is_visible():
            print("弹窗已消失,继续执行后续操作。")
            break
        else:
            print("弹窗仍然存在,重新获取图片并重试...")
            retry_count -= 1
            time.sleep(2)

    if retry_count == 0:
        print("重试次数已用尽,弹窗仍然存在,程序结束。")
    page.wait_for_timeout(20000)
    context.close()
    browser.close()

# 启动 Playwright
with sync_playwright() as playwright:
    run(playwright)

7. 运行脚本

将上述代码保存为一个 Python 脚本(例如 slider_captcha.py),然后运行它:

python slider_captcha.py

8. 注意事项

  • 滑块验证码的复杂性:不同的网站可能使用不同的滑块验证码机制,可能需要调整代码以适应不同的情况。
  • 反自动化机制:一些网站可能会检测自动化工具的使用,可能需要进一步伪装浏览器行为。
  • 法律和道德:确保你在合法和道德的范围内使用这些技术,不要用于恶意目的。

通过本教程,你应该能够理解如何使用 OpenCV 和 Playwright 破解滑块验证码,并将其应用到实际项目中。

Logo

中国智能体开发者社区,聚焦智能体与大模型开发,提供前沿资讯、实用工具链、开源项目及行业案例。通过技术沙龙、开发者大赛等活动,促进经验交流与协作,助力开发者快速构建创新智能应用。

更多推荐