首页 > 其他分享> > 盲人智能眼镜手机APP开发之目标识别的移植

盲人智能眼镜手机APP开发之目标识别的移植

2021-06-08 11:58:58 作者：互联网

目标识别模型导入

关于目标识别模型的导入，过程中也存在很多问题，踩了很多坑，尝试过基于pytorch的yolo模型，尝试过基于paddlepaddle框架的ssd模型，都存在一些问题。最终还是采用基于paddle-lite的moblile_net ssd模型成功导入。记录一下探索的过程，以及在此期间遇到的问题。

一.pytorch框架向android studio 的移植

问题一：
pytorch框架的版本问题，pytorch与android studio之间的平台移植是最近才开发的技术，所以还存在一些问题，一个比较重要的就是pytorch版本必须在1.3及以上的话才接受移植，否则就会移植失败。这个地方一开始没有注意到使用pytorch1.2版本训练了yolov4的模型最后无法导入浪费了不少精力。
问题二：
模型导入之后在APP中的预测输出列表与在PC端的预测输出列表不同，没有找到原因，只能暂时放弃使用yolo模型的计划。

二.百度Paddlepaddle框架的移植

这一个框架在之前的项目中就已经得到使用了，对此我还是选用和之前一样的模型，MobileNet ssd，参考此链接进行模型训练以及源码下载。参考此链接进行模型的导入以及使用。

问题：
但是，在移植之后使用中也还是存在问题，APP会直接闪退，暂没找到问题原因。

三.Paddle-Lite的移植

参照官方的文档进行移植条件的准备工作。
主要分为以下几步：

源码编译或者直接下载
注意要移植的环境进行不同条件的编译。最终生成两个文件：
模型转化
使用opt工具进行模型转化，要注意opt的版本一定要和paddle-lite的版本相同，否则会出现问题。
模型移植
根据demo文件进行模型移植以及预测使用工作。目标识别完整模块代码：

package model.moblessd;
import com.baidu.paddle.lite.*;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.graphics.Matrix;
import android.util.Log;
import com.serenegiant.usbcameratest.MainActivity;
import com.baidu.paddle.lite.MobileConfig;
import com.baidu.paddle.lite.PaddlePredictor;
import com.baidu.paddle.lite.PowerMode;
import com.baidu.paddle.lite.Tensor;
import com.serenegiant.usb.UVCCamera;
import com.serenegiant.widget.SimpleUVCCameraTextureView;
import java.io.File;
public class ssd {

    private static final String TAG = ssd.class.getName();
    private PaddlePredictor paddlePredictor;
    private Tensor inputTensor;
    private long[] inputShape = new long[]{1, 3, 300, 300};
    private static float[] scale = new float[]{1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
    private static float[] inputMean = new float[]{0.485f, 0.456f, 0.406f};
    private static float[] inputStd = new float[]{0.229f, 0.224f, 0.225f};
    private static final int NUM_THREADS = 4;
    protected float scoreThreshold = 0.5f;
    private SimpleUVCCameraTextureView mUVCCameraView;
    private MainActivity mMainActivity;
    float imgLeft;
    float imgTop ;
    float imgRight ;
    float imgBottom ;
    public ssd(String modelPath) throws Exception {
        File file = new File(modelPath);
        if (!file.exists()) {
            throw new Exception("model file is not exists!");
        }
        try {
            MobileConfig config = new MobileConfig();
            config.setModelFromFile(modelPath);
            config.setThreads(NUM_THREADS);
            config.setPowerMode(PowerMode.LITE_POWER_HIGH);
            paddlePredictor = PaddlePredictor.createPaddlePredictor(config);

            inputTensor = paddlePredictor.getInput(0);
            inputTensor.resize(inputShape);
        } catch (Exception e) {
            e.printStackTrace();
            throw new Exception("load model fail!");
        }
    }
    /**从手机内存中获取图像进行预测*/
    /*public float[] predictImage(String image_path) throws Exception {
        if (!new File(image_path).exists()) {
            throw new Exception("image file is not exists!");
        }
        FileInputStream fis = new FileInputStream(image_path);
        Bitmap bitmap = BitmapFactory.decodeStream(fis);
        float[] result = predictImage(bitmap);
        if (bitmap.isRecycled()) {
            bitmap.recycle();
        }
        return result;
    }*/
    
/**从摄像头获取图片进行预测*/
    public String predictImage(Bitmap bitmap) throws Exception {
        return predict(bitmap);
    }
    // prediction
    public String predict(Bitmap bmp) throws Exception {
        Bitmap b = getScaleBitmap(bmp);
        float[] inputData = getScaledMatrix(b, (int) inputShape[2], (int) inputShape[3]);
        b.recycle();
        bmp.recycle();
        inputTensor.setData(inputData);
        try {
            paddlePredictor.run();
        } catch (Exception e) {
            throw new Exception("predict image fail! log:" + e);
        }
        Tensor outputTensor = paddlePredictor.getOutput(0);
        String outputResult = "";
        int objectIdx = 0;
        long outputShape[] = outputTensor.shape();
        long outputSize = 1;
        for (long s : outputShape) {
            outputSize *= s;
        }
        for (int i = 0; i < outputSize; i += 6) {
            float score = outputTensor.getFloatData()[i + 1];
            if (score < scoreThreshold) {
                continue;
            }
            int categoryIdx = (int) outputTensor.getFloatData()[i];
            String categoryName = "Unknown";
            if (21 > 0 && categoryIdx >= 0 && categoryIdx < 21) {
                categoryName = "id:"+categoryIdx;
            }
            float rawLeft = outputTensor.getFloatData()[i + 2];
            float rawTop = outputTensor.getFloatData()[i + 3];
            float rawRight = outputTensor.getFloatData()[i + 4];
            float rawBottom = outputTensor.getFloatData()[i + 5];
            float clampedLeft = Math.max(Math.min(rawLeft, 1.f), 0.f);
            float clampedTop = Math.max(Math.min(rawTop, 1.f), 0.f);
            float clampedRight = Math.max(Math.min(rawRight, 1.f), 0.f);
            float clampedBottom = Math.max(Math.min(rawBottom, 1.f), 0.f);
//             imgLeft = clampedLeft * imgWidth;
//             imgTop = clampedTop * imgWidth;
//             imgRight = clampedRight * imgHeight;
//             imgBottom = clampedBottom * imgHeight;
            //int color = objectColor[objectIdx % objectColor.length];
           // rectPaint.setColor(color);
            //txtPaint.setColor(color);
            //canvas.drawRect(imgLeft, imgTop, imgRight, imgBottom, rectPaint);
//            canvas.drawText(objectIdx + "." + categoryName + ":" + String.format("%.3f", score),
//                    imgLeft + txtXOffset, imgTop + txtYOffset, txtPaint);
            outputResult += objectIdx + "." + categoryName + " - " + String.format("%.3f", score) +
                    " [" + String.format("%.3f", rawLeft) + "," + String.format("%.3f", rawTop) + "," + String.format("%.3f", rawRight) + "," + String.format("%.3f", rawBottom) + "]\n";
            objectIdx++;
        }//最后的输出，第一位是识别到的序号，第二位是识别目录，之后是识别预测框。
//        float[] result = outputTensor.getFloatData();
//        Log.d(TAG, Arrays.toString(result));
//        int l = getMaxResult(result);
//        return new float[]{l, result[l]};
        return outputResult;
    }
public float getimgl()
{
    return imgLeft;
}
    public float getimgr()
    {
        return imgRight;
    }
    public float getimgt()
    {
        return imgTop;
    }
    public float getimgd()
    {
        return getimgd();
    }
    // 对将要预测的图片进行预处理
    private static float[] getScaledMatrix(Bitmap bitmap, int desWidth, int desHeight) {
        float[] dataBuf = new float[3 * desWidth * desHeight];
        int rIndex;
        int gIndex;
        int bIndex;
        int[] pixels = new int[desWidth * desHeight];
        Bitmap bm = Bitmap.createScaledBitmap(bitmap, desWidth, desHeight, false);
        bm.getPixels(pixels, 0, desWidth, 0, 0, desWidth, desHeight);
        int j = 0;
        int k = 0;
        for (int i = 0; i < pixels.length; i++) {
            int clr = pixels[i];
            j = i / desHeight;
            k = i % desWidth;
            rIndex = j * desWidth + k;
            gIndex = rIndex + desHeight * desWidth;
            bIndex = gIndex + desHeight * desWidth;
            // 转成RGB通道顺序
            dataBuf[rIndex] = (((clr & 0x00ff0000) >> 16) * scale[0] - inputMean[0]) / inputStd[0];
            dataBuf[gIndex] = (((clr & 0x0000ff00) >> 8) * scale[1] - inputMean[1]) / inputStd[1];
            dataBuf[bIndex] = (((clr & 0x000000ff)) * scale[2] - inputMean[2]) / inputStd[2];

        }
        if (bm.isRecycled()) {
            bm.recycle();
        }
        return dataBuf;
    }
    // get max probability label
    public static int getMaxResult(float[] result) {
        float probability = 0;
        int r = 0;
        for (int i = 0; i < result.length; i++) {
            if (probability < result[i]) {
                probability = result[i];
                r = i;
            }
        }
        return r;
    }
    private Bitmap getScaleBitmap(Bitmap bitmap) {
        int bmpWidth = bitmap.getWidth();
        int bmpHeight = bitmap.getHeight();
        int size = (int) inputShape[2];
        float scaleWidth = (float) size / bitmap.getWidth();
        float scaleHeight = (float) size / bitmap.getHeight();
        Matrix matrix = new Matrix();
        matrix.postScale(scaleWidth, scaleHeight);
        return Bitmap.createBitmap(bitmap, 0, 0, bmpWidth, bmpHeight, matrix, true);
    }
}

标签：String,int,APP,float,盲人,bitmap,import,new,移植
来源： https://blog.csdn.net/weixin_41877339/article/details/117693429