java实现图片文字识别ocr

更新时间:2023-05-26 04:40:22 阅读: 评论:0

java实现图⽚⽂字识别ocr
最近在开发的时候需要识别图⽚中的⼀些⽂字,⽹上找了相关资料之后,发现google有⼀个离线的⼯具,以下为java使⽤的demo
在此之前,使⽤这个⼯具需要在本地安装OCR⼯具:
下⾯⼀个是⼀定要安装的离线包,建议默认安装
上⾯⼀个是中⽂的语⾔包,如果⽹络可以FQ的童鞋可以在安装的时候就选择语⾔包在线安装,有多种语⾔可供选择,默认只有英⽂的
exe安装好之后,把上⾯⼀个⽂件拷到安装⽬录下tessdata⽂件夹下
如C:\Program Files (x86)\Tesract-OCR\tessdata下
然后下⾯两个是可选包,如果图⽚不做临时⽂件处理的话,可以不需要带的
⾸先是⼀个临时⽂件⽣成⽤的类以防源⽂件损坏,参考某位博友的例⼦@Gunner
感恩节的习俗英文package org.;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.Locale;
import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import javax.imageio.ImageWriteParam;abandonment
import javax.imageio.ImageWriter;
import adata.IIOMetadata;
import javax.imageio.stream.ImageInputStream;
import javax.imageio.stream.ImageOutputStream;
import dia.imageio.plugins.tiff.TIFFImageWriteParam;
public class ImageIOHelper {
private Locale locale=Locale.CHINESE;
/**
* ur t locale Construct
* @param locale
*/
public ImageIOHelper(Locale locale){
this.locale=locale;
}
/**
* default construct using default locale Locale.CHINESE
*/
public ImageIOHelper(){
}
/**
什么是id* create tempFile of Image in order to prevent damaging original file
* @param imageFile
* @param imageFormat like png,jps .etc
* @return TempFile of Image
* @throws IOException
*/
public File createImage(File imageFile, String imageFormat) throws IOException {
Iterator<ImageReader> readers = ImageReadersByFormatName(imageFormat);
ImageReader reader = ();
ImageInputStream iis = ateImageInputStream(imageFile);
reader.tInput(iis);
IIOMetadata streamMetadata = StreamMetadata();
TIFFImageWriteParam tiffWriteParam = new TIFFImageWriteParam(Locale.CHINESE);      tiffWriteParam.tCompressionMode(ImageWriteParam.MODE_DISABLED);
Iterator<ImageWriter> writers = ImageWritersByFormatName("tiff");
ImageWriter writer = ();
BufferedImage bi = ad(0);
yoxiIIOImage image = new IIOImage(bi,ImageMetadata(0));
File tempFile = tempImageFile(imageFile);
ImageOutputStream ios = ateImageOutputStream(tempFile);
writer.tOutput(ios);
writer.write(streamMetadata, image, tiffWriteParam);
奋斗英语
ios.clo();
iis.clo();
writer.dispo();
reader.dispo();
return tempFile;
}
/**
* add suffix to tempfile
* @param imageFile
* @return
* @throws IOException
*/
private File tempImageFile(File imageFile) throws IOException {
String path = Path();
StringBuffer strB = new StringBuffer(path);
strB.inrt(path.lastIndexOf('.'),"_text_recognize_temp");
String String().replaceFirst("(?<=//.)(//w+)$", "tif");
return new String());
}
}
下⾯是真正识别的内容:
package org.;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import org.jdesktop.swingx.util.OS;
/**
* TEXT Recognize Utils
* @author ink.Flower
*
*/
public class OCRUtil {
private final String LANG_OPTION = "-l"; //英⽂字母⼩写l,并⾮数字1
private final String EOL = Property("line.parator");
private String tessPath = "C://Program Files (x86)//Tesract-OCR";//ocr默认安装路径
private String transname="chi_sim";//默认中⽂语⾔包,识别中⽂
/**
* Construct method of OCR ,t Tesract-OCR install path
* @param tessPath Tesract-OCR install path
* @param transFileName traningFile name aineddata
*/
divisionalpublic OCRUtil(String tessPath,String transFileName){
}
/**
* Construct method of OCR,default path is "C://Program Files (x86)//Tesract-OCR"
*/
public OCRUtil(){  }
public String getTessPath() {
return tessPath;
}
public void tTessPath(String tessPath) {
}
public String getTransname() {
return transname;
}
public void tTransname(String transname) {
}
public String getLANG_OPTION() {
return LANG_OPTION;
}summerholiday
public String getEOL() {
return EOL;
}
/**
* recognize text in image
* @param imageFile
* @param imageFormat
* @return text recognized in image
* @throws Exception
*/
public String recognizeText(File imageFile,String imageFormat)throws Exception{
File tempImage = new ImageIOHelper().createImage(imageFile,imageFormat);
return ocrImages(tempImage, imageFile);
}
/**
* recognize text in image
* @param imageFile
* @param imageFormat
cod是什么
* @param locale
* @return text recognized in image
* @throws Exception
*/
public String recognizeText(File imageFile,String imageFormat,Locale locale)throws Exception{
File tempImage = new ImageIOHelper(locale).createImage(imageFile,imageFormat);
return ocrImages(tempImage, imageFile);
}
/**
conflict
*
* @param tempImage
* @param imageFile
* @return
* @throws IOException
* @throws InterruptedException
*/
private String ocrImages(File tempImage,File imageFile) throws IOException, InterruptedException{    File outputFile = new ParentFile(),"output");
List<String> cmd = new ArrayList<String>();
if(OS.isWindowsXP()){sade
cmd.add(tessPath+"//tesract");
}el if(OS.isLinux()){
cmd.add("tesract");
}el{
cmd.add(tessPath+"//tesract");
}
cmd.add("");
cmd.Name());
cmd.add(LANG_OPTION);
cmd.add(transname);
ProcessBuilder pb = new ProcessBuilder();
pb.ParentFile());
cmd.t(1, Name());
Process process = pb.start();
int w = process.waitFor();
tempImage.delete();//删除临时正在⼯作⽂件
if(w==0){
BufferedReader in = new BufferedReader(new InputStreamReader(new AbsolutePath()+".txt"),"UTF-8"));
String str;
while((str = in.readLine())!=null){
strB.append(str).append(EOL);
}
in.clo();
}el{
String msg;
switch(w){
ca 1:
msg = "Errors accessing files.There may be spaces in your image's filename.";
break;
ca 29:
msg = "Cannot recongnize the image or its lected region.";
break;
ca 31:
msg = "Unsupported image format.";
break;
default:
msg = "Errors occurred.";
}
tempImage.delete();
throw new RuntimeException(msg);
}
new AbsolutePath()+".txt").delete();
String();
}
}
在实验中发现,如果对有多个⽂字的⼤图进⾏直接识别的话,效果可能⽐较差,所以可以参考另⼀篇切图的博⽂,将图⽚取⼀块之后再识别
这样成功率会提⾼很多。
以上为离线识别版本,效率因图⽽已,具体使⽤的时候可以总结分析,希望对⼤家的学习有所帮助,也希望⼤家多多⽀持。

本文发布于:2023-05-26 04:40:22,感谢您对本站的认可!

本文链接:https://www.wtabcd.cn/fanwen/fan/90/122948.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

标签:安装   识别   时候   离线
相关文章
留言与评论(共有 0 条评论)
   
验证码:
Copyright ©2019-2022 Comsenz Inc.Powered by © 专利检索| 网站地图