This commit is contained in:
辉鸭蛋
2026-04-19 00:32:33 +08:00
parent 2ea663b0ff
commit dca422de8e
9 changed files with 319 additions and 64 deletions

View File

@@ -311,6 +311,17 @@ public class BgiOnnxFactory
: new BgiYoloPredictor(model, cached, CreateSessionOptions(model, false));
}
/// <summary>
/// 根据模型创建一个 RedNet 分类预测器
/// </summary>
/// <param name="model">模型</param>
/// <param name="labelRelativePath">标签文件相对路径,可选,支持 .txt/.json</param>
/// <returns>BgiRedNetPredictor</returns>
public BgiRedNetPredictor CreateRedNetPredictor(BgiOnnxModel model, string? labelRelativePath = null)
{
return new BgiRedNetPredictor(model, CreateInferenceSession(model), labelRelativePath);
}
/// <summary>
/// 根据模型创建一个onnx运行时的InferenceSession
/// </summary>
@@ -582,4 +593,4 @@ public class BgiOnnxFactory
result["enable_opencl_throttling"] = "true";
return result;
}
}
}

View File

@@ -50,6 +50,12 @@ public class BgiOnnxModel
public static readonly BgiOnnxModel BgiAvatarSide =
Register("BgiAvatarSide", @"Assets\Model\Common\avatar_side_classify_sim.onnx");
/// <summary>
/// 角色识别RedNet-110
/// </summary>
public static readonly BgiOnnxModel BgiPickRedNet110 =
Register("BgiPickRedNet110", @"Assets\Model\Common\resnet_pick.onnx");
/// <summary>
/// paddleOCR V4 检测模型
/// </summary>
@@ -139,4 +145,4 @@ public class BgiOnnxModel
RegisteredModels.Add(model);
return model;
}
}
}

View File

@@ -0,0 +1,157 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.Json;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using SixLabors.ImageSharp;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Processing;
namespace BetterGenshinImpact.Core.Recognition.ONNX;
public sealed class BgiRedNetPredictor : IDisposable
{
private const int DefaultInputSize = 224;
private static readonly float[] ImagenetMean = [0.485f, 0.456f, 0.406f];
private static readonly float[] ImagenetStd = [0.229f, 0.224f, 0.225f];
private readonly InferenceSession _session;
private readonly string[]? _labels;
private readonly string _inputName;
private readonly int _inputWidth;
private readonly int _inputHeight;
/// <summary>
/// 使用 BgiOnnxFactory 创建这个类的实例
/// </summary>
internal BgiRedNetPredictor(BgiOnnxModel model, InferenceSession session, string? labelRelativePath = null)
{
_session = session;
var input = _session.InputMetadata.FirstOrDefault();
if (input.Key is null || input.Value is null)
{
throw new InvalidDataException("ONNX 模型输入信息为空");
}
_inputName = input.Key;
var dimensions = input.Value.Dimensions;
if (dimensions.Length < 4)
{
throw new InvalidDataException($"ONNX 模型输入维度不正确,预期 >=4实际 {dimensions.Length}");
}
_inputHeight = dimensions[^2] > 0 ? dimensions[^2] : DefaultInputSize;
_inputWidth = dimensions[^1] > 0 ? dimensions[^1] : DefaultInputSize;
_labels = LoadLabels(labelRelativePath ?? Path.ChangeExtension(model.ModelRelativePath, ".labels.txt"));
}
public RedNetPrediction Predict(Image<Rgb24> image)
{
using var resized = image.Clone(ctx => ctx.Resize(_inputWidth, _inputHeight));
var tensorInput = BuildInputTensor(resized);
using var results = _session.Run([
NamedOnnxValue.CreateFromTensor(_inputName, tensorInput)
]);
var logits = results.First().AsEnumerable<float>().ToArray();
if (logits.Length == 0)
{
throw new InvalidDataException("ONNX 模型输出为空");
}
var probabilities = Softmax(logits);
var maxIndex = 0;
var maxValue = probabilities[0];
for (var i = 1; i < probabilities.Length; i++)
{
if (probabilities[i] <= maxValue) continue;
maxValue = probabilities[i];
maxIndex = i;
}
var label = maxIndex >= 0 && _labels is not null && maxIndex < _labels.Length
? _labels[maxIndex]
: $"Class_{maxIndex}";
return new RedNetPrediction(maxIndex, label, maxValue);
}
private DenseTensor<float> BuildInputTensor(Image<Rgb24> image)
{
var tensor = new DenseTensor<float>([1, 3, _inputHeight, _inputWidth]);
for (var y = 0; y < _inputHeight; y++)
{
for (var x = 0; x < _inputWidth; x++)
{
var pixel = image[x, y];
var r = pixel.R / 255f;
var g = pixel.G / 255f;
var b = pixel.B / 255f;
tensor[0, 0, y, x] = (r - ImagenetMean[0]) / ImagenetStd[0];
tensor[0, 1, y, x] = (g - ImagenetMean[1]) / ImagenetStd[1];
tensor[0, 2, y, x] = (b - ImagenetMean[2]) / ImagenetStd[2];
}
}
return tensor;
}
private static float[] Softmax(float[] logits)
{
var max = logits.Max();
var result = new float[logits.Length];
var sum = 0d;
for (var i = 0; i < logits.Length; i++)
{
var value = Math.Exp(logits[i] - max);
result[i] = (float)value;
sum += value;
}
if (sum <= 0)
{
return result;
}
for (var i = 0; i < result.Length; i++)
{
result[i] = (float)(result[i] / sum);
}
return result;
}
private static string[]? LoadLabels(string labelRelativePath)
{
var labelAbsolutePath = Core.Config.Global.Absolute(labelRelativePath);
if (!File.Exists(labelAbsolutePath))
{
return null;
}
var ext = Path.GetExtension(labelAbsolutePath);
if (ext.Equals(".json", StringComparison.OrdinalIgnoreCase))
{
var labels = JsonSerializer.Deserialize<string[]>(File.ReadAllText(labelAbsolutePath));
return labels is { Length: > 0 } ? labels : null;
}
var lines = File.ReadAllLines(labelAbsolutePath)
.Where(t => !string.IsNullOrWhiteSpace(t))
.Select(t => t.Trim())
.ToArray();
return lines.Length > 0 ? lines : null;
}
public void Dispose()
{
_session.Dispose();
}
}
public readonly record struct RedNetPrediction(int ClassIndex, string ClassLabel, float Confidence);

View File

@@ -1,4 +1,4 @@
using CommunityToolkit.Mvvm.ComponentModel;
using CommunityToolkit.Mvvm.ComponentModel;
using System;
namespace BetterGenshinImpact.GameTask.AutoPick
@@ -37,6 +37,14 @@ namespace BetterGenshinImpact.GameTask.AutoPick
[ObservableProperty]
private string _ocrEngine = PickOcrEngineEnum.Paddle.ToString();
/// <summary>
/// 拾取识别模式
/// - Ocr: 通过OCR识别物品名
/// - RedNet: 通过分类模型直接识别物品名
/// </summary>
[ObservableProperty]
private string _recognitionMode = PickRecognitionModeEnum.Ocr.ToString();
/// <summary>
/// 急速模式
/// 无视文字识别结果,直接拾取

View File

@@ -1,5 +1,6 @@
using BetterGenshinImpact.Core.Config;
using BetterGenshinImpact.Core.Recognition;
using BetterGenshinImpact.Core.Recognition.ONNX;
using BetterGenshinImpact.Core.Recognition.OCR;
using BetterGenshinImpact.Core.Recognition.ONNX.SVTR;
using BetterGenshinImpact.Core.Script.Dependence.Model.TimerConfig;
@@ -8,6 +9,7 @@ using BetterGenshinImpact.GameTask.AutoPick.Assets;
using BetterGenshinImpact.Helpers;
using BetterGenshinImpact.Service;
using BetterGenshinImpact.View.Windows;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using OpenCvSharp;
using System;
@@ -49,9 +51,14 @@ public partial class AutoPickTrigger : ITaskTrigger
private HashSet<string> _whiteList = [];
private RecognitionObject _pickRo;
private readonly Lazy<BgiRedNetPredictor> _pickRedNetPredictor = new(() =>
App.ServiceProvider.GetRequiredService<BgiOnnxFactory>().CreateRedNetPredictor(BgiOnnxModel.BgiPickRedNet110));
// 外部配置
private AutoPickExternalConfig? _externalConfig;
double scale = TaskContext.Instance().SystemInfo.AssetScale;
AutoPickConfig config = TaskContext.Instance().Config.AutoPickConfig;
public AutoPickTrigger()
{
@@ -192,8 +199,7 @@ public partial class AutoPickTrigger : ITaskTrigger
return;
}
var scale = TaskContext.Instance().SystemInfo.AssetScale;
var config = TaskContext.Instance().Config.AutoPickConfig;
// 存在 L 键位是千星奇遇,无需拾取
using var lKeyRa = content.CaptureRectArea.Find(_autoPickAssets.LRo);
@@ -204,9 +210,10 @@ public partial class AutoPickTrigger : ITaskTrigger
// 识别到拾取键,开始识别物品图标
var isExcludeIcon = false;
_autoPickAssets.ChatIconRo.RegionOfInterest = new Rect(
var iconRect = new Rect(
foundRectArea.X + (int)(config.ItemIconLeftOffset * scale), foundRectArea.Y,
(int)((config.ItemTextLeftOffset - config.ItemIconLeftOffset) * scale), foundRectArea.Height);
_autoPickAssets.ChatIconRo.RegionOfInterest = iconRect;
using var chatIconRa = content.CaptureRectArea.Find(_autoPickAssets.ChatIconRo);
speedTimer.Record("识别聊天图标");
if (!chatIconRa.IsEmpty())
@@ -251,66 +258,15 @@ public partial class AutoPickTrigger : ITaskTrigger
// return;
//}
// 这类文字识别比较特殊,都是针对某个场景的文字识别,所以暂时未抽象到识别对象中
// 计算出文字区域
var textRect = new Rect(foundRectArea.X + (int)(config.ItemTextLeftOffset * scale), foundRectArea.Y,
(int)((config.ItemTextRightOffset - config.ItemTextLeftOffset) * scale), foundRectArea.Height);
if (textRect.X + textRect.Width > content.CaptureRectArea.CacheGreyMat.Width
|| textRect.Y + textRect.Height > content.CaptureRectArea.CacheGreyMat.Height)
{
Debug.WriteLine("AutoPickTrigger: 文字区域 out of range");
return;
}
using var gradMat = new Mat(content.CaptureRectArea.CacheGreyMat,
new Rect(textRect.X, textRect.Y, textRect.Width, Math.Min(textRect.Height, 3)));
var avgGrad = gradMat.Sobel(MatType.CV_32F, 1, 0).Mean().Val0;
if (avgGrad < -3)
{
Debug.WriteLine($"AutoPickTrigger: 已在拾取中,跳过本次拾取 {avgGrad}");
return;
}
string text;
if (config.OcrEngine == nameof(PickOcrEngineEnum.Yap))
if (config.RecognitionMode == nameof(PickRecognitionModeEnum.RedNet))
{
var textMat = new Mat(content.CaptureRectArea.CacheGreyMat, textRect);
text = TextInferenceFactory.Pick.Value.Inference(textMat);
text = RecognizePickTextByRedNet(content, iconRect);
}
else
{
using var textMat = new Mat(content.CaptureRectArea.SrcMat, textRect);
var boundingRect = TextRectExtractor.GetTextBoundingRect(textMat);
// var boundingRect = new Rect(); // 不使用自己写的文字区域提取
// 如果找到有效区域
if (boundingRect.X < 20 && boundingRect.Width > 5 && boundingRect.Height > 5)
{
// 截取只包含文字的区域
using var textOnlyMat = new Mat(textMat, new Rect(0, 0,
boundingRect.Right + 5 < textMat.Width ? boundingRect.Right + 5 : textMat.Width, textMat.Height));
text = OcrFactory.Paddle.OcrWithoutDetector(textOnlyMat);
// if (RuntimeHelper.IsDebug)
// {
// // 如果不等于正确文字,则保存图片
// if (text != "烹饪")
// {
// var path = Global.Absolute("log/pick");
// Directory.CreateDirectory(path);
// var str = $"{DateTime.Now:yyyyMMddHHmmssfff}";
// // textMat.SaveImage(Path.Combine(path, $"pick_ocr_ori_{str}.png"));
// // 画上 boundingRect
// Cv2.Rectangle(textMat, boundingRect, new Scalar(0, 0, 255), 1);
// textMat.SaveImage(Path.Combine(path, $"pick_ocr_rect_{str}.png"));
// bin.SaveImage(Path.Combine(path, $"bin_{str}.png"));
// }
// }
}
else
{
Debug.WriteLine("-- 无法识别到有效文字区域尝试直接OCR DET");
text = OcrFactory.Paddle.Ocr(textMat);
}
text = RecognizePickTextByOcr(content, foundRectArea, config.OcrEngine);
}
speedTimer.Record("文字识别");
@@ -370,6 +326,87 @@ public partial class AutoPickTrigger : ITaskTrigger
speedTimer.DebugPrint();
}
private string RecognizePickTextByRedNet(CaptureContent content, Rect iconRect)
{
try
{
using var imageRegion = content.CaptureRectArea.DeriveCrop(iconRect);
var prediction = _pickRedNetPredictor.Value.Predict(imageRegion.CacheImage);
Debug.WriteLine($"AutoPickTrigger: RedNet预测结果 {prediction.ClassLabel} 置信度 {prediction.Confidence}");
if (prediction.Confidence < 0.47)
{
return string.Empty;
}
return prediction.ClassLabel;
}
catch (Exception e)
{
_logger.LogWarning(e, "AutoPick RedNet推理失败回退OCR");
return string.Empty;
}
}
private string RecognizePickTextByOcr(CaptureContent content, Region foundRectArea, string ocrEngine)
{
// 这类文字识别比较特殊,都是针对某个场景的文字识别,所以暂时未抽象到识别对象中
// 计算出文字区域
var textRect = new Rect(foundRectArea.X + (int)(config.ItemTextLeftOffset * scale), foundRectArea.Y,
(int)((config.ItemTextRightOffset - config.ItemTextLeftOffset) * scale), foundRectArea.Height);
if (textRect.X + textRect.Width > content.CaptureRectArea.CacheGreyMat.Width
|| textRect.Y + textRect.Height > content.CaptureRectArea.CacheGreyMat.Height)
{
Debug.WriteLine("AutoPickTrigger: 文字区域 out of range");
return string.Empty;
}
using var gradMat = new Mat(content.CaptureRectArea.CacheGreyMat,
new Rect(textRect.X, textRect.Y, textRect.Width, Math.Min(textRect.Height, 3)));
var avgGrad = gradMat.Sobel(MatType.CV_32F, 1, 0).Mean().Val0;
if (avgGrad < -3)
{
Debug.WriteLine($"AutoPickTrigger: 已在拾取中,跳过本次拾取 {avgGrad}");
return string.Empty;
}
if (ocrEngine == nameof(PickOcrEngineEnum.Yap))
{
using var textMat = new Mat(content.CaptureRectArea.CacheGreyMat, textRect);
return TextInferenceFactory.Pick.Value.Inference(textMat);
}
using var paddleMat = new Mat(content.CaptureRectArea.SrcMat, textRect);
var boundingRect = TextRectExtractor.GetTextBoundingRect(paddleMat);
// var boundingRect = new Rect(); // 不使用自己写的文字区域提取
// 如果找到有效区域
if (boundingRect.X < 20 && boundingRect.Width > 5 && boundingRect.Height > 5)
{
// 截取只包含文字的区域
using var textOnlyMat = new Mat(paddleMat, new Rect(0, 0,
boundingRect.Right + 5 < paddleMat.Width ? boundingRect.Right + 5 : paddleMat.Width, paddleMat.Height));
return OcrFactory.Paddle.OcrWithoutDetector(textOnlyMat);
// if (RuntimeHelper.IsDebug)
// {
// // 如果不等于正确文字,则保存图片
// if (text != "烹饪")
// {
// var path = Global.Absolute("log/pick");
// Directory.CreateDirectory(path);
// var str = $"{DateTime.Now:yyyyMMddHHmmssfff}";
// // textMat.SaveImage(Path.Combine(path, $"pick_ocr_ori_{str}.png"));
// // 画上 boundingRect
// Cv2.Rectangle(textMat, boundingRect, new Scalar(0, 0, 255), 1);
// textMat.SaveImage(Path.Combine(path, $"pick_ocr_rect_{str}.png"));
// bin.SaveImage(Path.Combine(path, $"bin_{str}.png"));
// }
// }
}
Debug.WriteLine("-- 无法识别到有效文字区域尝试直接OCR DET");
return OcrFactory.Paddle.Ocr(paddleMat);
}
private bool DoNotPick(string text)
{
// 唯一一个动态拾取项,特殊处理,不拾取
@@ -576,4 +613,4 @@ public partial class AutoPickTrigger : ITaskTrigger
return cleanedSpan.ToString();
}
}
}

View File

@@ -1,4 +1,4 @@
namespace BetterGenshinImpact.GameTask.AutoPick;
namespace BetterGenshinImpact.GameTask.AutoPick;
public enum PickOcrEngineEnum
{

View File

@@ -0,0 +1,7 @@
namespace BetterGenshinImpact.GameTask.AutoPick;
public enum PickRecognitionModeEnum
{
Ocr,
RedNet
}

View File

@@ -92,12 +92,39 @@
<ui:TextBlock Grid.Row="0"
Grid.Column="0"
FontTypography="Body"
Text="选择自动拾取文字识别引擎"
Text="选择自动拾取识别模式"
TextWrapping="Wrap" />
<ui:TextBlock Grid.Row="1"
Grid.Column="0"
Foreground="{ui:ThemeResource TextFillColorTertiaryBrush}"
Text="Paddle可识别所有文字,速度慢,消耗少;Yap可识别部分文字,快且准,消耗大"
Text="Ocr: 通过OCR识别物品名; RedNet: 直接推理拾取图像识别物品名"
TextWrapping="Wrap" />
<ComboBox Grid.Row="0"
Grid.RowSpan="2"
Grid.Column="1"
Width="100"
Margin="0,0,8,0"
ItemsSource="{Binding PickRecognitionModeNames}"
SelectedItem="{Binding Config.AutoPickConfig.RecognitionMode, Mode=TwoWay}" />
</Grid>
<Grid Margin="16">
<Grid.RowDefinitions>
<RowDefinition Height="Auto" />
<RowDefinition Height="Auto" />
</Grid.RowDefinitions>
<Grid.ColumnDefinitions>
<ColumnDefinition Width="*" />
<ColumnDefinition Width="Auto" />
</Grid.ColumnDefinitions>
<ui:TextBlock Grid.Row="0"
Grid.Column="0"
FontTypography="Body"
Text="选择OCR引擎"
TextWrapping="Wrap" />
<ui:TextBlock Grid.Row="1"
Grid.Column="0"
Foreground="{ui:ThemeResource TextFillColorTertiaryBrush}"
Text="仅在识别模式为 Ocr 时生效Paddle可识别所有文字,速度慢,消耗少;Yap可识别部分文字,快且准,消耗大"
TextWrapping="Wrap" />
<ComboBox Grid.Row="0"
Grid.RowSpan="2"

View File

@@ -30,6 +30,8 @@ public partial class TriggerSettingsPageViewModel : ViewModel
[ObservableProperty] private string[] _selectChatOptionTypeNames = [SelectChatOptionTypes.UseMouse, SelectChatOptionTypes.UseInteractionKey];
[ObservableProperty] private string[] _pickRecognitionModeNames = [PickRecognitionModeEnum.Ocr.ToString(), PickRecognitionModeEnum.RedNet.ToString()];
[ObservableProperty] private string[] _pickOcrEngineNames = [PickOcrEngineEnum.Paddle.ToString(), PickOcrEngineEnum.Yap.ToString()];
[ObservableProperty] private List<string> _pickButtonNames;