盲人智能眼镜手机APP开发之目标识别的移植
作者:互联网
目标识别模型导入
关于目标识别模型的导入,过程中也存在很多问题,踩了很多坑,尝试过基于pytorch的yolo模型,尝试过基于paddlepaddle框架的ssd模型,都存在一些问题。最终还是采用基于paddle-lite的moblile_net ssd模型成功导入。记录一下探索的过程,以及在此期间遇到的问题。
一.pytorch框架向android studio 的移植
- 问题一:
pytorch框架的版本问题,pytorch与android studio之间的平台移植是最近才开发的技术,所以还存在一些问题,一个比较重要的就是pytorch版本必须在1.3及以上的话才接受移植,否则就会移植失败。这个地方一开始没有注意到使用pytorch1.2版本训练了yolov4的模型最后无法导入浪费了不少精力。 - 问题二:
模型导入之后在APP中的预测输出列表与在PC端的预测输出列表不同,没有找到原因,只能暂时放弃使用yolo模型的计划。
二.百度Paddlepaddle框架的移植
这一个框架在之前的项目中就已经得到使用了,对此我还是选用和之前一样的模型,MobileNet ssd,参考此链接进行模型训练以及源码下载。参考此链接进行模型的导入以及使用。
- 问题:
但是,在移植之后使用中也还是存在问题,APP会直接闪退,暂没找到问题原因。
三.Paddle-Lite的移植
参照官方的文档进行移植条件的准备工作。
主要分为以下几步:
-
源码编译或者直接下载
注意要移植的环境进行不同条件的编译。最终生成两个文件:
-
模型转化
使用opt工具进行模型转化,要注意opt的版本一定要和paddle-lite的版本相同,否则会出现问题。 -
模型移植
根据demo文件进行模型移植以及预测使用工作。目标识别完整模块代码:
package model.moblessd;
import com.baidu.paddle.lite.*;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.graphics.Matrix;
import android.util.Log;
import com.serenegiant.usbcameratest.MainActivity;
import com.baidu.paddle.lite.MobileConfig;
import com.baidu.paddle.lite.PaddlePredictor;
import com.baidu.paddle.lite.PowerMode;
import com.baidu.paddle.lite.Tensor;
import com.serenegiant.usb.UVCCamera;
import com.serenegiant.widget.SimpleUVCCameraTextureView;
import java.io.File;
public class ssd {
private static final String TAG = ssd.class.getName();
private PaddlePredictor paddlePredictor;
private Tensor inputTensor;
private long[] inputShape = new long[]{1, 3, 300, 300};
private static float[] scale = new float[]{1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
private static float[] inputMean = new float[]{0.485f, 0.456f, 0.406f};
private static float[] inputStd = new float[]{0.229f, 0.224f, 0.225f};
private static final int NUM_THREADS = 4;
protected float scoreThreshold = 0.5f;
private SimpleUVCCameraTextureView mUVCCameraView;
private MainActivity mMainActivity;
float imgLeft;
float imgTop ;
float imgRight ;
float imgBottom ;
public ssd(String modelPath) throws Exception {
File file = new File(modelPath);
if (!file.exists()) {
throw new Exception("model file is not exists!");
}
try {
MobileConfig config = new MobileConfig();
config.setModelFromFile(modelPath);
config.setThreads(NUM_THREADS);
config.setPowerMode(PowerMode.LITE_POWER_HIGH);
paddlePredictor = PaddlePredictor.createPaddlePredictor(config);
inputTensor = paddlePredictor.getInput(0);
inputTensor.resize(inputShape);
} catch (Exception e) {
e.printStackTrace();
throw new Exception("load model fail!");
}
}
/**从手机内存中获取图像进行预测*/
/*public float[] predictImage(String image_path) throws Exception {
if (!new File(image_path).exists()) {
throw new Exception("image file is not exists!");
}
FileInputStream fis = new FileInputStream(image_path);
Bitmap bitmap = BitmapFactory.decodeStream(fis);
float[] result = predictImage(bitmap);
if (bitmap.isRecycled()) {
bitmap.recycle();
}
return result;
}*/
/**从摄像头获取图片进行预测*/
public String predictImage(Bitmap bitmap) throws Exception {
return predict(bitmap);
}
// prediction
public String predict(Bitmap bmp) throws Exception {
Bitmap b = getScaleBitmap(bmp);
float[] inputData = getScaledMatrix(b, (int) inputShape[2], (int) inputShape[3]);
b.recycle();
bmp.recycle();
inputTensor.setData(inputData);
try {
paddlePredictor.run();
} catch (Exception e) {
throw new Exception("predict image fail! log:" + e);
}
Tensor outputTensor = paddlePredictor.getOutput(0);
String outputResult = "";
int objectIdx = 0;
long outputShape[] = outputTensor.shape();
long outputSize = 1;
for (long s : outputShape) {
outputSize *= s;
}
for (int i = 0; i < outputSize; i += 6) {
float score = outputTensor.getFloatData()[i + 1];
if (score < scoreThreshold) {
continue;
}
int categoryIdx = (int) outputTensor.getFloatData()[i];
String categoryName = "Unknown";
if (21 > 0 && categoryIdx >= 0 && categoryIdx < 21) {
categoryName = "id:"+categoryIdx;
}
float rawLeft = outputTensor.getFloatData()[i + 2];
float rawTop = outputTensor.getFloatData()[i + 3];
float rawRight = outputTensor.getFloatData()[i + 4];
float rawBottom = outputTensor.getFloatData()[i + 5];
float clampedLeft = Math.max(Math.min(rawLeft, 1.f), 0.f);
float clampedTop = Math.max(Math.min(rawTop, 1.f), 0.f);
float clampedRight = Math.max(Math.min(rawRight, 1.f), 0.f);
float clampedBottom = Math.max(Math.min(rawBottom, 1.f), 0.f);
// imgLeft = clampedLeft * imgWidth;
// imgTop = clampedTop * imgWidth;
// imgRight = clampedRight * imgHeight;
// imgBottom = clampedBottom * imgHeight;
//int color = objectColor[objectIdx % objectColor.length];
// rectPaint.setColor(color);
//txtPaint.setColor(color);
//canvas.drawRect(imgLeft, imgTop, imgRight, imgBottom, rectPaint);
// canvas.drawText(objectIdx + "." + categoryName + ":" + String.format("%.3f", score),
// imgLeft + txtXOffset, imgTop + txtYOffset, txtPaint);
outputResult += objectIdx + "." + categoryName + " - " + String.format("%.3f", score) +
" [" + String.format("%.3f", rawLeft) + "," + String.format("%.3f", rawTop) + "," + String.format("%.3f", rawRight) + "," + String.format("%.3f", rawBottom) + "]\n";
objectIdx++;
}//最后的输出,第一位是识别到的序号,第二位是识别目录,之后是识别预测框。
// float[] result = outputTensor.getFloatData();
// Log.d(TAG, Arrays.toString(result));
// int l = getMaxResult(result);
// return new float[]{l, result[l]};
return outputResult;
}
public float getimgl()
{
return imgLeft;
}
public float getimgr()
{
return imgRight;
}
public float getimgt()
{
return imgTop;
}
public float getimgd()
{
return getimgd();
}
// 对将要预测的图片进行预处理
private static float[] getScaledMatrix(Bitmap bitmap, int desWidth, int desHeight) {
float[] dataBuf = new float[3 * desWidth * desHeight];
int rIndex;
int gIndex;
int bIndex;
int[] pixels = new int[desWidth * desHeight];
Bitmap bm = Bitmap.createScaledBitmap(bitmap, desWidth, desHeight, false);
bm.getPixels(pixels, 0, desWidth, 0, 0, desWidth, desHeight);
int j = 0;
int k = 0;
for (int i = 0; i < pixels.length; i++) {
int clr = pixels[i];
j = i / desHeight;
k = i % desWidth;
rIndex = j * desWidth + k;
gIndex = rIndex + desHeight * desWidth;
bIndex = gIndex + desHeight * desWidth;
// 转成RGB通道顺序
dataBuf[rIndex] = (((clr & 0x00ff0000) >> 16) * scale[0] - inputMean[0]) / inputStd[0];
dataBuf[gIndex] = (((clr & 0x0000ff00) >> 8) * scale[1] - inputMean[1]) / inputStd[1];
dataBuf[bIndex] = (((clr & 0x000000ff)) * scale[2] - inputMean[2]) / inputStd[2];
}
if (bm.isRecycled()) {
bm.recycle();
}
return dataBuf;
}
// get max probability label
public static int getMaxResult(float[] result) {
float probability = 0;
int r = 0;
for (int i = 0; i < result.length; i++) {
if (probability < result[i]) {
probability = result[i];
r = i;
}
}
return r;
}
private Bitmap getScaleBitmap(Bitmap bitmap) {
int bmpWidth = bitmap.getWidth();
int bmpHeight = bitmap.getHeight();
int size = (int) inputShape[2];
float scaleWidth = (float) size / bitmap.getWidth();
float scaleHeight = (float) size / bitmap.getHeight();
Matrix matrix = new Matrix();
matrix.postScale(scaleWidth, scaleHeight);
return Bitmap.createBitmap(bitmap, 0, 0, bmpWidth, bmpHeight, matrix, true);
}
}
标签:String,int,APP,float,盲人,bitmap,import,new,移植 来源: https://blog.csdn.net/weixin_41877339/article/details/117693429