(蛇年说python,python搞爬虫,爬虫爬🐍🐍。祝大家除夕快乐)

关于jm的图像分层倒置问题,我上一篇文章给出里一个看似正确的结果。但是在我在“书海”里畅游(看🐍🐍)的时候发现了一些错误 :有些区分度很低的图片恢复之后会是乱上加乱。以下给张例图

很明显,图片的水印并不在底端且色彩不连续,明显能判断这张图片还原的有问题。

于是我选择更改方式,选择以jm水印为基准,通过ssim计算切片相似度。

之所以选择ssim算法,原因是其可以计算结构相似度。

而程序的逻辑便是:

按照几种情况穷举还原结果。

读取还原结果,截取左下角与右下角各200*50px的区域

对左、右下角和标准jm水印进行canny边缘识别提取特征

比对提取到的边缘

相似度最高的便是正确结果

以下便是按照上述逻辑编写的代码 

#jm图片还原Version2.0
from skimage.metrics import structural_similarity as ssim
import cv2
from PIL import Image
import math
import os
import sys
import numpy as np
import shutil


#源文件计数
def CountFiles(directory):
    num_files = 0
    for root, dirs, files in os.walk(directory):
        num_files += len(files)
    return num_files

#取最大值序号
def GetMaxIndex(arr):
    MaxVal = max(arr)
    MaxIndex = arr.index(MaxVal)
    return MaxIndex

#输出最大值
def GetBig(a, b):
    if a > b:
        return a
    elif a < b:
        return b
    elif a == b:
        return a

#获取对应分区数的分割位置
def GetPartHeight(Height, PartNum):
    PartHeight = [0]
    AvePartHeight = math.floor(Height / PartNum)
    LastPartHeight = AvePartHeight + Height % PartNum
    for i in range(1, PartNum):
        PartHeight.append(AvePartHeight * i)
    PartHeight.append(Height)
    return PartHeight

#获取还原时的粘贴位置
def GetReCanvasPartHeight(Height, PartNum):
    AvePartHeight = math.floor(Height / PartNum)
    LastPartHeight = AvePartHeight + Height % PartNum
    RePartHeight = [0, LastPartHeight]
    for i in range(1, PartNum):
        RePartHeight.append(LastPartHeight + AvePartHeight * i)
    return RePartHeight

#复原照片
def ReCanvas(PartNum, InImage):
    PartCon = []
    Height = InImage.height
    Width = InImage.width
    HeightGroup = GetPartHeight(Height, PartNum)
    ReHeightGroup = GetReCanvasPartHeight(Height, PartNum)
    for i in range(PartNum):
        PartCoor = (0, HeightGroup[i], Width, HeightGroup[ i + 1 ])
        Part = InImage.crop(PartCoor)
        PartCon.append(Part)
    RePartCon = PartCon[::-1]
    OutImage = Image.new( 'RGB', (Width, Height))
    for i in range(PartNum):
        OutImage.paste(RePartCon[i], (0, ReHeightGroup[i]))
    return OutImage

#边缘jiance
def PicCanny(SouPic):       
    blurred = cv2.GaussianBlur(SouPic, (5, 5), 0)
    edges = cv2.Canny(blurred, 50, 150)
    return edges

#PIL转CV2
def PilToCV2(SouPic):
    CV2 = cv2.cvtColor(np.asarray(SouPic), cv2.COLOR_RGB2BGR)
    return CV2

#截取左、右下角
def Pic90(pic):
    a = pic.height
    b = pic.width
    boxz = (0, a - 50, 200, a)
    boxy = (b - 200, a - 50, b, a)
    pz = pic.crop(boxz)
    py = pic.crop(boxy)
    return pz, py
    
KeyStr1 = './comicname/'
PartNum = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]#所有分层情况
PicSave = "./{}".format(KeyStr1)
PicR = "./source/"#源文件位置
os.mkdir(PicSave)
biaozunshuiyin = "./exp.png"#标准jm水印
aaaaa = CountFiles(PicR)
for ccc in range(1, aaaaa + 1):
    print("---------")
    Picrd = "./Source/{:0>5d}.webp".format(ccc)
    dfdf = "{}/{:0>5d}.png".format(PicSave, ccc)
    ayt = Image.open(biaozunshuiyin)
    aruy = PilToCV2(ayt)
    aruy = PicCanny(aruy)#标准jm水印的边缘
    Spicd = Image.open(Picrd)
    PicCon = []
    valcon = []
    for i in PartNum:
        Spic = ReCanvas(i, Spicd)#每页穷举
        PicCon.append(Spic)
    for i in PicCon:#计算左、右下角相似度
        picz, picy = Pic90(i)
        iii = PilToCV2(picz)
        uuu = PilToCV2(picy)
        ed1 = PicCanny(iii)
        ed2 = PicCanny(uuu)
        rrr = ssim(ed1, aruy)
        bbb = ssim(ed2, aruy)
        vvv = GetBig(rrr, bbb)
        valcon.append(vvv)
        print(vvv)
    apapa = GetMaxIndex(valcon)
    casi = PicCon[apapa]
    casi.save(dfdf)#保存

标准jm水印 

这样便能正确的还原jm的图片(但是我没法子大量实验,爬jm太多导致ip被封了) ,速度大概在280ms一张图左右。

不过我还是通过这个2.0版本的程序领悟到了点新东西,即用水印标定分层位置。

再采用CV2库里的匹配算法,通过获取水印位置来获得分页页数,极大减少了无用的循环,一张图还原时间大概为150~250ms,(v1.0版本的程序还原时间约为3秒)

import cv2
import numpy as np
import math
from PIL import Image

#获取正向分层距离
def GetPartHeight(Height, PartNum):
    PartHeight = [0]
    AvePartHeight = math.floor(Height / PartNum)
    LastPartHeight = AvePartHeight + Height % PartNum
    for i in range(1, PartNum):
        PartHeight.append(AvePartHeight * i)
    PartHeight.append(Height)
    return PartHeight

#获取复原分层位置
def GetReCanvasPartHeight(Height, PartNum):
    AvePartHeight = math.floor(Height / PartNum)
    LastPartHeight = AvePartHeight + Height % PartNum
    RePartHeight = [0, LastPartHeight]
    for i in range(1, PartNum):
        RePartHeight.append(LastPartHeight + AvePartHeight * i)
    return RePartHeight

#复原
def ReCanvas(PartNum, InImage):
    PartCon = []
    Height = InImage.height
    Width = InImage.width
    HeightGroup = GetPartHeight(Height, PartNum)
    ReHeightGroup = GetReCanvasPartHeight(Height, PartNum)
    for i in range(PartNum):
        PartCoor = (0, HeightGroup[i], Width, HeightGroup[ i + 1 ])
        Part = InImage.crop(PartCoor)
        PartCon.append(Part)
    RePartCon = PartCon[::-1]
    OutImage = Image.new( 'RGB', (Width, Height))
    for i in range(PartNum):
        OutImage.paste(RePartCon[i], (0, ReHeightGroup[i]))
    return OutImage

vf = 'yourpic'
imgw = cv2.imread(vf, 0)#读取图片
#提取特征
blurred = cv2.GaussianBlur(imgw, (5, 5), 0)
img = cv2.Canny(blurred, 50, 150)
img2 = img.copy()
#下同,读取的是标准jm水印
templateq = cv2.imread('exp.png', 0)
blurred = cv2.GaussianBlur(templateq, (5, 5), 0)
template = cv2.Canny(blurred, 50, 150)
w, h = template.shape[::-1]
img = img2.copy()
res = cv2.matchTemplate(img, template, cv2.TM_CCOEFF)#匹配
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)   
top_left = max_loc
hei = top_left[1]+h#获取水印底部高度(即向下取整的平均分层高度)

PartNum = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
imgese = Image.open(vf)
heit = imgese.height

for i in PartNum:
    afc = math.floor(heit / i)#匹配对应的分层数
    if afc == hei:
        f = i
    else:
        continue

eee = ReCanvas(f, imgese)#还原
print(-t)
eee.show()

Logo

火山引擎开发者社区是火山引擎打造的AI技术生态平台,聚焦Agent与大模型开发,提供豆包系列模型(图像/视频/视觉)、智能分析与会话工具,并配套评测集、动手实验室及行业案例库。社区通过技术沙龙、挑战赛等活动促进开发者成长,新用户可领50万Tokens权益,助力构建智能应用。

更多推荐