Identify Text Data In Image To Read Mm/dd, Description And Amount Using Opencv Python

September 16, 2024 Post a Comment

import re import cv2 import pytesseract from pytesseract import Output from PIL import Image from pytesseract import image_to_string img = cv2.imread('/home/cybe

Solution 1:

To identify the text in the image, you must preprocess the image. To do this, we can remove the horizontal and vertical grid lines then throw the image into Pytesseract OCR. Here's the detected lines to be removed highlighted in green:

Result

Output from Pytesseract

ELECTRONICWITHDRAWALSDATEDESCRIPTIONAMOUNT01/02 Merchant Service Merch Fee 8030996550 CCD ID:1841010148$34.3001/03 Authnet Gateway Billing 104820413 CCD ID:187056856922.9501/0401/04OnlineTransferToMma...4622Transaction#:7794410276200.0001/0801/08OnlinePayment7732727073ToCleaningConnoisseur300.0001/1101/11OnlinePayment7744233248ToRamseySweis148.8001/1101/11OnlineTransferToMma...4622Transaction#:7816805988200.0001/1101/11PaymentToChaseCardEndingIN2342 500.0001/11AqabaHoldings,Inahl-51 Ahl-51 CCD ID:11137200481,441.2101/1401/12PaymentToChaseCardEndingIN2342 1,000.0001/1401/12OnlineTransferToMma...4622Transaction#:78410267191,000.0001/1601/16PaymentToChaseCardEndingIN2342 1,000.0001/1601/16OnlinePayment7852542882ToOakhurstGolf&CountryClub495.0001/1701/17OnlinePayment7762351731ToRamseySweis399.0501/1801/18OnlineTransferToMma...4622Transaction#:7837118990200.0001/18 Small Business Icpayment PPD ID:1131414876302.2401/2201/21PaymentToChaseCardEndingIN2342 1,000.0001/2301/23PaymentToChaseCardEndingIN2342 1,000.0001/2501/25OnlinePayment7786855117ToLvOfficeLimitedPartnership644.40

Code

import cv2
import pytesseract
import numpy as np

pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

image = cv2.imread('1.jpg')
result = image.copy()
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

# Find horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (35,1))
detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] iflen(cnts) == 2else cnts[1]
for c in cnts:
    cv2.drawContours(result, [c], -1, (255,255,255), -1)

# Find vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,15))
detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] iflen(cnts) == 2else cnts[1]
for c in cnts:
    cv2.drawContours(result, [c], -1, (255,255,255), -1)

data = pytesseract.image_to_string(result, lang='eng',config='--psm 6')
print(data)

cv2.imshow('thresh', thresh)
cv2.imshow('result', result)
cv2.waitKey()

Python Playground

Identify Text Data In Image To Read Mm/dd, Description And Amount Using Opencv Python

Solution 1:

Post a Comment for "Identify Text Data In Image To Read Mm/dd, Description And Amount Using Opencv Python"