【问题标题】:Improving the accuracy of text recognition when using iOS Vision Framework to scan a document提高使用 iOS Vision Framework 扫描文档时文本识别的准确性
【发布时间】:2020-11-13 09:11:33
【问题描述】:

我正在尝试构建一个能够读取任何文档/卡片上的文本的文档扫描仪。但是,它有时无法正确识别信用卡上的文本。准确性不错,但肯定有改进的余地。我使用了 VisionTextRecognition 框架,并使用了所有适合设置文本识别的标准设置。

这是我必须设置的文本识别请求

textRecognitionRequest = VNRecognizeTextRequest(completionHandler: { (request, error) in
            if let results = request.results, !results.isEmpty {
                if let requestResults = request.results as? [VNRecognizedTextObservation] {
                    var foundText = ""
                    for observation in recognizedText {
                        guard let candidate = observation.topCandidates(1).first else { continue }
                     foundText.append(candidate.string + "\n")
                    }
                }
            }
        }) 
        textRecognitionRequest.recognitionLevel = .accurate
        textRecognitionRequest.usesLanguageCorrection = true

有没有人有任何建议可以通过在某个时间点对扫描进行预处理或后处理以编程方式改进识别?

【问题讨论】:

    标签: ios text-recognition visionkit


    【解决方案1】:

    更新:我制作了一个完全开源的项目,可以帮助您完成所需的工作。看看吧:https://github.com/ethanwa/credit-card-scanner-and-validator

    **

    除了添加一些预设值来专门查找之外,您无法做很多事情来提高准确性,这对于 CC 编号没有意义,因此我什至不会费心展示该代码。您需要依靠 Apple 来改进他们的文本识别模型,因为 iOS 会迭代以使其真正得到改进。

    与此同时,我建议您可以做以下两件事:

    1. 对您认为自己收到的信用卡号进行验证。例如,Visa 以 4 开头,MasterCard 以 5 开头,Discover 以 6 开头,Amex 以 3 开头,等等。它们有特定的长度等等。见这里:https://www.freeformatter.com/credit-card-number-generator-validator.html

    2. 在相机源上一遍又一遍地迭代,直到你得到一个可以验证的数字。我不确定您当前是否只是在为卡拍照并处理该图像(听起来像您正在做的那样),但您应该每秒处理许多图像,直到获得有效的 CC。这很可能是 Apple 在您的手机上通过 Apple Pay 添加卡或使用银行应用以数字方式存入支票(查找有效路由和帐号)时的做法。

    这是我的意思的一个例子......

    我编写的这段代码可以在任何给定文本中挑选和验证 ISBN 编号(基本上是编目书籍的 10 和 13 位数字,其中有一个校验位用于验证),并且会一直查找直到找到所有数字,然后验证。它工作得非常好,而且速度非常快。看看这个 Swift 5.3 代码:

    import UIKit
    import Vision
    import Photos
    import AVFoundation
    
    class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
        
        var recognizedText = ""
        var finalText = ""
        var image: UIImage?
        var processing = false
        
        @IBOutlet weak var nameLabel: UILabel!
        @IBOutlet weak var setLabel: UILabel!
        @IBOutlet weak var numberLabel: UILabel!
        
        lazy var textDetectionRequest: VNRecognizeTextRequest = {
            let request = VNRecognizeTextRequest(completionHandler: self.handleDetectedText)
            request.recognitionLevel = .accurate
            request.usesLanguageCorrection = false
            return request
        }()
        
        private let videoOutput = AVCaptureVideoDataOutput()
        private let captureSession = AVCaptureSession()
        private lazy var previewLayer: AVCaptureVideoPreviewLayer = {
            let preview = AVCaptureVideoPreviewLayer(session: self.captureSession)
            preview.videoGravity = .resizeAspect
            return preview
        }()
    
        // MARK: AV
        
        override func viewDidLoad() {
            super.viewDidLoad()
            self.addCameraInput()
            self.addVideoOutput()
        }
        
        private func addCameraInput() {
            let device = AVCaptureDevice.default(for: .video)!
            let cameraInput = try! AVCaptureDeviceInput(device: device)
            self.captureSession.addInput(cameraInput)
        }
        
        override func viewDidLayoutSubviews() {
            super.viewDidLayoutSubviews()
            self.previewLayer.frame = self.view.bounds
        }
        
        private func addVideoOutput() {
            self.videoOutput.videoSettings = [(kCVPixelBufferPixelFormatTypeKey as NSString) : NSNumber(value: kCVPixelFormatType_32BGRA)] as [String : Any]
            self.videoOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "my.image.handling.queue"))
            self.captureSession.addOutput(self.videoOutput)
        }
        
        func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection)
        {
            if !processing
            {
                guard let frame = CMSampleBufferGetImageBuffer(sampleBuffer) else {
                    debugPrint("unable to get image from sample buffer")
                    return
                }
                print("did receive image frame")
                // process image here
            
                self.processing = true
                
                let ciimage : CIImage = CIImage(cvPixelBuffer: frame)
                let theimage : UIImage = self.convert(cmage: ciimage)
                
                self.image = theimage
                processImage()
            }
        }
    
        // Convert CIImage to CGImage
        func convert(cmage:CIImage) -> UIImage
        {
             let context:CIContext = CIContext.init(options: nil)
             let cgImage:CGImage = context.createCGImage(cmage, from: cmage.extent)!
             let image:UIImage = UIImage.init(cgImage: cgImage)
             return image
        }
        
        // AV
        
        func processImage()
        {
            DispatchQueue.main.async {
                self.nameLabel.text = ""
                self.setLabel.text = ""
                self.numberLabel.text = ""
            }
            
            guard let image = image, let cgImage = image.cgImage else { return }
            
            let requests = [textDetectionRequest]
            let imageRequestHandler = VNImageRequestHandler(cgImage: cgImage, orientation: .right, options: [:])
            DispatchQueue.global(qos: .userInitiated).async {
                do {
                    try imageRequestHandler.perform(requests)
                } catch let error {
                    print("Error: \(error)")
                }
            }
        }
        
        fileprivate func handleDetectedText(request: VNRequest?, error: Error?)
        {
            self.finalText = ""
            
            if let error = error {
                print(error.localizedDescription)
                self.processing = false
                return
            }
            guard let results = request?.results, results.count > 0 else {
                print("No text was found.")
                self.processing = false
                return
            }
    
            if let requestResults = request?.results as? [VNRecognizedTextObservation] {
                self.recognizedText = ""
                for observation in requestResults {
                    guard let candidiate = observation.topCandidates(1).first else { return }
                    self.recognizedText += candidiate.string
                    self.recognizedText += " "
                }
                
                var replaced = self.recognizedText.replacingOccurrences(of: "-", with: "")
                replaced = String(replaced.filter { !"\n\t\r".contains($0) })
                let replacedArr = replaced.components(separatedBy: " ")
                
                for here in replacedArr
                {
                    let final = here.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
    
                    if (final.count == 10 || final.count == 13) && final.containsISBNnums && Validate.isbn(final) // validate barcode
                    {
                        self.finalText += final
                        print(final)
                        self.captureSession.stopRunning()
                        DispatchQueue.main.async {
                            self.previewLayer.removeFromSuperlayer()
                        }
                        break
                    }
                }
    
                DispatchQueue.main.async {
                    self.numberLabel.text = self.finalText
                }
            }
            
            self.processing = false
        }
        
        // MARK: Buttons
    
        // This is a live camera view that will start a capture session
        @IBAction func takePhoto(_ sender: Any) {
            self.view.layer.addSublayer(self.previewLayer)
            self.captureSession.startRunning()
        }
        
        @IBAction func choosePhoto(_ sender: Any) {
            presentPhotoPicker(type: .photoLibrary)
        }
        
        fileprivate func presentPhotoPicker(type: UIImagePickerController.SourceType) {
            let controller = UIImagePickerController()
            controller.sourceType = type
            controller.delegate = self
            present(controller, animated: true, completion: nil)
        }
    }
    
    extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate {
        
        func imagePickerControllerDidCancel(_ picker: UIImagePickerController) {
            dismiss(animated: true, completion: nil)
        }
        
        func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [UIImagePickerController.InfoKey : Any]) {
            
            dismiss(animated: true, completion: nil)
            image = info[.originalImage] as? UIImage
            processImage()
        }
    }
    
    extension String {
        var containsISBNnums: Bool {
            guard self.count > 0 else { return false }
            let nums: Set<Character> = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "X"]
            return Set(self).isSubset(of: nums)
        }
    }
    

    【讨论】:

      猜你喜欢
      • 2011-06-07
      • 1970-01-01
      • 2017-09-08
      • 1970-01-01
      • 2019-04-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2020-05-18
      相关资源
      最近更新 更多