1.下载tesseract-ocr
https://digi.bib.uni-mannheim.de/tesseract/
2. 配置环境变量
即在path中增加ocr的安装路径
3. 测试是否配置成功
tesseract
4.下载语言包
/tesseract-ocr/tessdata/releases/tag/4.1.0
将以下文件加入到tessdata目录下
5.命令行测试
1.tesseract + 图片路径 + 保存结果名 + -l 语言集示列: tesseract 1606150081.png 1606150081 -l chi_sim2.tesseract + 图片路径 +stdout -l +语言集示列: tesseract D:\test.png stdout -l chi_sim
5.程序实现,不需要引入任何第三方jar包
package com.example.demo.controller;import org.springframework.web.bind.annotation.PostMapping;import org.springframework.web.bind.annotation.RequestParam;import org.springframework.web.bind.annotation.RestController;import org.springframework.web.multipart.MultipartFile;import java.io.BufferedReader;import java.io.File;import java.io.IOException;import java.io.InputStreamReader;@RestControllerpublic class OcrTestController {@PostMapping("/image/extract")public String reg(@RequestParam("file")MultipartFile file) throws IOException {System.out.println(System.getProperty("user.dir"));String result = "";String filename = file.getOriginalFilename();File save = new File(System.getProperty("user.dir")+"\\"+filename);if (!save.exists()){save.createNewFile();}file.transferTo(save);String cmd = String.format("C:\\Program Files (x86)\\Tesseract-OCR\\tesseract %s stdout -l %s",System.getProperty("user.dir")+"\\"+filename,"chi_sim");result = cmd(cmd);System.out.println(result);return result;}public static String cmd(String cmd) {BufferedReader br = null;try {Process p = Runtime.getRuntime().exec(cmd);br = new BufferedReader(new InputStreamReader(p.getInputStream()));String line = null;StringBuilder sb = new StringBuilder();while ((line = br.readLine()) != null) {sb.append(line + "\n");}return sb.toString();} catch (Exception e) {e.printStackTrace();}finally{if (br != null){try {br.close();} catch (Exception e) {e.printStackTrace();}}}return null;}}
6.测序测试