2024/9/7 更新代码

2026-05-10 00:44:16 +08:00 · 2024-09-07 11:31:00 +08:00
parent 592ef02776
commit d444790657
16 changed files with 700 additions and 0 deletions
--- a/development/init.py
+++ b/development/init.py
--- a/development/crop_image.py
+++ b/development/crop_image.py
@@ -0,0 +1,74 @@
+from PIL import Image, ImageFont, ImageDraw, ImageOps
+from io import BytesIO
+
+
+def convert_png_to_jpg(png_bytes: bytes) -> bytes:
+    # 将传入的 bytes 转换为图像对象
+    png_image = Image.open(BytesIO(png_bytes))
+
+    # 创建一个 BytesIO 对象，用于存储输出的 JPG 数据
+    output_bytes = BytesIO()
+
+    # 检查图像是否具有透明度通道 (RGBA)
+    if png_image.mode == 'RGBA':
+        # 创建白色背景
+        white_bg = Image.new("RGB", png_image.size, (255, 255, 255))
+        # 将 PNG 图像粘贴到白色背景上，透明部分用白色填充
+        white_bg.paste(png_image, (0, 0), png_image)
+        jpg_image = white_bg
+    else:
+        # 如果图像没有透明度，直接转换为 RGB 模式
+        jpg_image = png_image.convert("RGB")
+
+    # 将转换后的图像保存为 JPG 格式到 BytesIO 对象
+    jpg_image.save(output_bytes, format="JPEG")
+
+    # 返回保存后的 JPG 图像的 bytes
+    return output_bytes.getvalue()
+
+
+def crop_image(image_bytes, coordinates):
+    img = Image.open(BytesIO(image_bytes))
+    width, height = img.size
+    grid_width = width // 3
+    grid_height = height // 3
+    cropped_images = []
+    for coord in coordinates:
+        y, x = coord
+        left = (x - 1) * grid_width
+        upper = (y - 1) * grid_height
+        right = left + grid_width
+        lower = upper + grid_height
+        box = (left, upper, right, lower)
+        cropped_img = img.crop(box)
+        cropped_images.append(cropped_img)
+    return cropped_images
+
+
+
+if __name__ == "__main__":
+    # 切割顺序，这里是从左到右，从上到下[x,y]
+    coordinates = [
+        [1, 1],
+        [1, 2],
+        [1, 3],
+        [2, 1],
+        [2, 2],
+        [2, 3],
+        [3, 1],
+        [3, 2],
+        [3, 3],
+    ]
+    with open("./image_test/bg.jpg", "rb") as rb:
+        bg_img = rb.read()
+    cropped_images = crop_image(bg_img, coordinates)
+    # 一个个保存下来
+    for j, img_crop in enumerate(cropped_images):
+        img_crop.save(f"./image_test/bg{j}.jpg")
+    
+    # 图标格式转换
+    with open("./image_test/icon.png", "rb") as rb:
+        icon_img = rb.read()
+    icon_img_jpg = convert_png_to_jpg(icon_img)
+    with open("./image_test/icon.jpg", "wb") as wb:
+        wb.write(icon_img_jpg)
--- a/development/image_test/bg.jpg
+++ b/development/image_test/bg.jpg
--- a/development/image_test/icon.png
+++ b/development/image_test/icon.png
--- a/development/predict.py
+++ b/development/predict.py
@@ -0,0 +1,148 @@
+import os
+
+import numpy as np
+
+from development.resnet18 import MyResNet18, data_transform
+from development.crop_image import crop_image, convert_png_to_jpg
+import torch
+import time
+from PIL import Image
+from io import BytesIO
+import onnxruntime as ort
+
+
+def predict(icon_image, bg_image):
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    model_path = os.path.join(current_dir, 'model', 'resnet18_38_0.021147585306924.pth')
+    coordinates = [
+        [1, 1],
+        [1, 2],
+        [1, 3],
+        [2, 1],
+        [2, 2],
+        [2, 3],
+        [3, 1],
+        [3, 2],
+        [3, 3],
+    ]
+    target_images = []
+    target_images.append(data_transform(Image.open(BytesIO(icon_image))))
+
+    bg_images = crop_image(bg_image, coordinates)
+    for bg_image in bg_images:
+        target_images.append(data_transform(bg_image))
+
+    start = time.time()
+    model = MyResNet18(num_classes=91)  # 这里的类别数要与训练时一致
+    model.load_state_dict(torch.load(model_path))
+    model.eval()
+    print("加载模型，耗时:", time.time() - start)
+    start = time.time()
+
+    target_images = torch.stack(target_images, dim=0)
+    target_outputs = model(target_images)
+
+    scores = []
+
+    for i, out_put in enumerate(target_outputs):
+        if i == 0:
+            # 增加维度，以便于计算
+            target_output = out_put.unsqueeze(0)
+        else:
+            similarity = torch.nn.functional.cosine_similarity(
+                target_output, out_put.unsqueeze(0)
+            )
+            scores.append(similarity.cpu().item())
+    # 从左到右，从上到下，依次为每张图片的置信度
+    print(scores)
+    # 对数组进行排序，保持下标
+    indexed_arr = list(enumerate(scores))
+    sorted_arr = sorted(indexed_arr, key=lambda x: x[1], reverse=True)
+    # 提取最大三个数及其下标
+    largest_three = sorted_arr[:3]
+    print(largest_three)
+    print("识别完成，耗时:", time.time() - start)
+
+
+# 加载onnx模型
+start = time.time()
+current_dir = os.path.dirname(os.path.abspath(__file__))
+model_path = os.path.join(current_dir, 'model', 'resnet18.onnx')
+session = ort.InferenceSession(model_path)
+input_name = session.get_inputs()[0].name
+print("加载模型，耗时:", time.time() - start)
+
+
+def predict_onnx(icon_image, bg_image):
+    coordinates = [
+        [1, 1],
+        [1, 2],
+        [1, 3],
+        [2, 1],
+        [2, 2],
+        [2, 3],
+        [3, 1],
+        [3, 2],
+        [3, 3],
+    ]
+
+    def cosine_similarity(vec1, vec2):
+        # 将输入转换为 NumPy 数组
+        vec1 = np.array(vec1)
+        vec2 = np.array(vec2)
+        # 计算点积
+        dot_product = np.dot(vec1, vec2)
+        # 计算向量的范数
+        norm_vec1 = np.linalg.norm(vec1)
+        norm_vec2 = np.linalg.norm(vec2)
+        # 计算余弦相似度
+        similarity = dot_product / (norm_vec1 * norm_vec2)
+        return similarity
+
+    def data_transforms(image):
+        image = image.resize((224, 224))
+        image_array = np.array(image)
+        image_array = image_array.astype(np.float32) / 255.0
+        mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
+        std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
+        image_array = (image_array - mean) / std
+        image_array = np.transpose(image_array, (2, 0, 1))
+        # image_array = np.expand_dims(image_array, axis=0)
+        return image_array
+
+    target_images = []
+    target_images.append(data_transforms(Image.open(BytesIO(icon_image))))
+
+    bg_images = crop_image(bg_image, coordinates)
+
+    for bg_image in bg_images:
+        target_images.append(data_transforms(bg_image))
+
+    start = time.time()
+    outputs = session.run(None, {input_name: target_images})[0]
+
+    scores = []
+    for i, out_put in enumerate(outputs):
+        if i == 0:
+            target_output = out_put
+        else:
+            similarity = cosine_similarity(target_output, out_put)
+            scores.append(similarity)
+    # 从左到右，从上到下，依次为每张图片的置信度
+    # print(scores)
+    # 对数组进行排序，保持下标
+    indexed_arr = list(enumerate(scores))
+    sorted_arr = sorted(indexed_arr, key=lambda x: x[1], reverse=True)
+    # 提取最大三个数及其下标
+    largest_three = sorted_arr[:3]
+    answer = [coordinates[i[0]] for i in largest_three]
+    print(f"识别完成{answer}，耗时: {time.time() - start}")
+    return answer
+
+
+if __name__ == "__main__":
+    with open("image_test/icon.png", "rb") as rb:
+        icon_image = convert_png_to_jpg(rb.read())
+    with open("image_test/bg.jpg", "rb") as rb:
+        bg_image = rb.read()
+    predict_onnx(icon_image, bg_image)
--- a/development/pth2onnx.py
+++ b/development/pth2onnx.py
@@ -0,0 +1,17 @@
+from resnet18 import MyResNet18
+import torch
+
+def convert():
+    # 加载 PyTorch 模型
+    model_path = "model/resnet18_38_0.021147585306924.pth"
+    model = MyResNet18(num_classes=91)
+    model.load_state_dict(torch.load(model_path))
+    model.eval()
+    # 生成一个示例输入
+    dummy_input = torch.randn(10, 3, 224, 224)
+    # 将模型转换为 ONNX 格式
+    torch.onnx.export(model, dummy_input, "model/resnet18.onnx", verbose=True)
+
+
+if __name__ == '__main__':
+    convert()
--- a/development/resnet18.py
+++ b/development/resnet18.py
@@ -0,0 +1,117 @@
+import torchvision.transforms as transforms
+from matplotlib import pyplot as plt
+from torchvision.datasets import ImageFolder
+from tqdm import tqdm
+import torch
+import torchvision
+import torch.nn as nn
+from torch.utils.data import DataLoader
+import numpy as np
+
+# 定义数据转换
+data_transform = transforms.Compose(
+    [
+        transforms.Resize((224, 224)),  # 调整图像大小
+        transforms.ToTensor(),  # 将图像转换为张量
+        transforms.Normalize(
+            (0.485, 0.456, 0.406), (0.229, 0.224, 0.225)
+        ),  # 标准化图像
+    ]
+)
+
+
+# 定义数据集
+class CustomDataset:
+    def __init__(self, data_dir):
+        self.dataset = ImageFolder(root=data_dir, transform=data_transform)
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def __getitem__(self, idx):
+        image, label = self.dataset[idx]
+        return image, label
+
+
+class MyResNet18(torch.nn.Module):
+    def __init__(self, num_classes):
+        super(MyResNet18, self).__init__()
+        self.resnet = torchvision.models.resnet18(pretrained=True)
+        self.resnet.fc = nn.Linear(512, num_classes)  # 修改这里的输入大小为512
+
+    def forward(self, x):
+        return self.resnet(x)
+
+
+def train(epoch):
+    print("judge the cuda: " + str(torch.version.cuda))
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print("this train use devices: " + str(device))
+
+    data_dir = "dataset"
+    # 自定义数据集实例
+    custom_dataset = CustomDataset(data_dir)
+    # 数据加载器
+    batch_size = 64
+    data_loader = DataLoader(custom_dataset, batch_size=batch_size, shuffle=True)
+
+    # 初始化模型 num_classes就是目录下的子文件夹数目，每个子文件夹对应一个分类，模型输出的向量长度也是这个长度
+    model = MyResNet18(num_classes=91)
+    model.to(device)
+
+    # 损失函数
+    criterion = torch.nn.CrossEntropyLoss()
+    # 优化器
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
+
+    epoch_losses = []
+    # 训练模型
+    for i in range(epoch):
+        losses = []
+
+        # 迭代器进度条
+        data_loader_tqdm = tqdm(data_loader)
+
+        epoch_loss = 0
+        for inputs, labels in data_loader_tqdm:
+            # 将输入数据和标签传输到指定的计算设备（如 GPU 或 CPU）
+            inputs, labels = inputs.to(device), labels.to(device)
+
+            # 梯度更新之前将所有模型参数的梯度置为零，防止梯度累积
+            optimizer.zero_grad()
+
+            # 前向传播：将输入数据传入模型，计算输出
+            outputs = model(inputs)
+
+            # 根据模型的输出和实际标签计算损失值
+            loss = criterion(outputs, labels)
+
+            # 将当前批次的损失值记录到 losses 列表中，以便后续计算平均损失
+            losses.append(loss.item())
+            epoch_loss = np.mean(losses)
+            data_loader_tqdm.set_description(
+                f"This epoch is {str(i + 1)} and it's loss is {loss.item()}, average loss {epoch_loss}"
+            )
+
+            # 反向传播：根据当前损失值计算模型参数的梯度
+            loss.backward()
+            # 使用优化器更新模型参数，根据梯度调整模型参数
+            optimizer.step()
+        epoch_losses.append(epoch_loss)
+        # 每过一个batch就保存一次模型
+        torch.save(model.state_dict(), f'model/resnet18_{str(i + 1)}_{epoch_loss}.pth')
+
+    # loss 变化绘制代码
+    data = np.array(epoch_losses)
+    plt.figure(figsize=(10, 6))
+    plt.plot(data)
+    plt.title(f"{epoch} epoch loss change")
+    plt.xlabel("epoch")
+    plt.ylabel("Loss")
+    # 显示图像
+    plt.show()
+    print(f"completed. Model saved.")
+
+
+if __name__ == '__main__':
+    train(40)
--- a/development/test/test.py
+++ b/development/test/test.py
@@ -0,0 +1,3 @@
+import torch
+print(torch.version.cuda)  # 检查 PyTorch 是否包含 CUDA 支持
+print(torch.cuda.is_available())  # 检查 CUDA 是否可用