From 69ce1ef4058ec97fe2550e0e4cb59d11ce56e1d9 Mon Sep 17 00:00:00 2001 From: zhouwentao <1577701412@qq.com> Date: Fri, 11 Aug 2023 14:12:16 +0800 Subject: [PATCH] updates --- .../ocr/controller/OcrIdentifyController.java | 16 +- .../jeecg/modules/ocr/model/OcrResult.java | 3 + .../service/impl/OcrIdentifyServiceImpl.java | 70 +++++-- .../jeecg/modules/ocr/utils/StrCharUtil.java | 182 ++++++++++++++++++ 4 files changed, 252 insertions(+), 19 deletions(-) create mode 100644 jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/ocr/utils/StrCharUtil.java diff --git a/jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/ocr/controller/OcrIdentifyController.java b/jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/ocr/controller/OcrIdentifyController.java index dc5dcbf..f9dbe8f 100644 --- a/jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/ocr/controller/OcrIdentifyController.java +++ b/jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/ocr/controller/OcrIdentifyController.java @@ -210,9 +210,9 @@ public class OcrIdentifyController extends JeecgController edit(@RequestBody OcrIdentify ocrIdentify) { AssertUtils.notEmpty(ocrIdentify.getId(), "[id]不可为空"); OcrIdentify identify = ocrIdentifyService.getById(ocrIdentify.getId()); - if (Arrays.asList("0","1","2").contains(identify.getStatus())) { + /*if (Arrays.asList("0","1","2").contains(identify.getStatus())) { throw new JeecgBootException("当前状态不可编辑"); - } + }*/ ocrIdentifyService.updateById(ocrIdentify); return Result.OK("编辑成功!"); } @@ -343,6 +343,18 @@ public class OcrIdentifyController extends JeecgController restartTask(@RequestBody OcrIdentify ocrIdentify){ + AssertUtils.notEmpty(ocrIdentify.getId(), "[id]不可为空"); + OcrIdentify identify = ocrIdentifyService.getById(ocrIdentify.getId()); + if (!Arrays.asList("1","9").contains(identify.getStatus())) { + return Result.error("当前任务不可重新执行"); + } + //TODO 重新执行操作 + return Result.OK("已执行"); + } + @ApiOperation(value = "上报通知给无量云") @PostMapping(value = "/pushNotice") public Result pushNotice(@RequestBody OcrIdentify ocrIdentify){ diff --git a/jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/ocr/model/OcrResult.java b/jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/ocr/model/OcrResult.java index f13d2ff..b17239c 100644 --- a/jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/ocr/model/OcrResult.java +++ b/jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/ocr/model/OcrResult.java @@ -26,4 +26,7 @@ public class OcrResult { private String failureReason; @ApiModelProperty(value = "规则验证结果") private Boolean ruleValidation=false; + + @ApiModelProperty(value = "文本匹配度") + private Double textRate; } diff --git a/jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/ocr/service/impl/OcrIdentifyServiceImpl.java b/jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/ocr/service/impl/OcrIdentifyServiceImpl.java index 1ef1c37..e1dc8f1 100644 --- a/jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/ocr/service/impl/OcrIdentifyServiceImpl.java +++ b/jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/ocr/service/impl/OcrIdentifyServiceImpl.java @@ -18,6 +18,7 @@ import org.jeecg.modules.ocr.mapper.OcrIdentifyMapper; import org.jeecg.modules.ocr.model.*; import org.jeecg.modules.ocr.service.*; import org.jeecg.modules.ocr.utils.ArrayOUtils; +import org.jeecg.modules.ocr.utils.StrCharUtil; import org.jeecg.modules.ocr.vo.OcrIdentifyVo; import org.jeecg.modules.ocr.vo.OcrRuleCheckVo; import org.springframework.scheduling.annotation.Async; @@ -154,13 +155,14 @@ public class OcrIdentifyServiceImpl extends ServiceImpl"); mapPutIfTrue(fieldRightMap, field, false); - ocrResultAdd(ocrResultList,value.getFieldName(), field, inputText, text, probability, imgPath, value.getFieldName() + "不匹配", false); + ocrResultAdd(ocrResultList,value.getFieldName(), field, inputText, text, probability, imgPath, value.getFieldName() + "不匹配", false,v); } } } @@ -364,13 +366,14 @@ public class OcrIdentifyServiceImpl extends ServiceImpl"); mapPutIfTrue(fieldRightMap, field, false); - ocrResultAdd(ocrResultList,value.getFieldName(), field, inputText, text, probability, imgPath, value.getFieldName() + "_参数不匹配", false); + ocrResultAdd(ocrResultList,value.getFieldName(), field, inputText, text, probability, imgPath, value.getFieldName() + "_参数不匹配", false,v); } } } @@ -477,6 +480,27 @@ public class OcrIdentifyServiceImpl extends ServiceImpl=i){ + for (int p = 0; p < textSplit.length; p++) { + trueNum++; + } + }else{ + break; + } + } + return 0; + } + @Override public void callbackWly(String ocrIdentifyId){ OcrIdentifyCallbackLog ocrIdentifyCallbackLog=new OcrIdentifyCallbackLog(); @@ -576,25 +600,18 @@ public class OcrIdentifyServiceImpl extends ServiceImpl ocrResultList,String fieldName, String field, String inputText, String ocrText, Double ocrPrecisionRate, String imgPath, String failureReason, Boolean ruleValidation) { + public static void ocrResultAdd(List ocrResultList,String fieldName, String field, String inputText, String ocrText, Double ocrPrecisionRate, String imgPath, String failureReason, Boolean ruleValidation,Double d) { OcrResult ocrResult = new OcrResult(); ocrResult.setTag(field); ocrResult.setTagName(fieldName); ocrResult.setOcrText(ocrText); ocrResult.setInputText(inputText); ocrResult.setOcrPrecisionRate(ocrPrecisionRate == null ? 0d : ocrPrecisionRate); - + if (d==null) { + ocrResult.setTextRate(0d); + }else{ + ocrResult.setTextRate(new BigDecimal(d).setScale(2,BigDecimal.ROUND_HALF_UP).doubleValue()); + } System.out.println("-----------------"); System.out.println(failureReason); System.out.println("-----------------"); @@ -613,6 +630,25 @@ public class OcrIdentifyServiceImpl extends ServiceImpl ocrResultList,String fieldName, String field, String inputText, String ocrText, Double ocrPrecisionRate, String imgPath, String failureReason, Boolean ruleValidation) { + if (ruleValidation) { + ocrResultAdd(ocrResultList,fieldName,field,inputText,ocrText,ocrPrecisionRate,imgPath,failureReason,ruleValidation,100d); + }else{ + ocrResultAdd(ocrResultList,fieldName,field,inputText,ocrText,ocrPrecisionRate,imgPath,failureReason,ruleValidation,0d); + } + } + /** * 组装 checkSemanticModelMap * diff --git a/jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/ocr/utils/StrCharUtil.java b/jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/ocr/utils/StrCharUtil.java new file mode 100644 index 0000000..86ba2f8 --- /dev/null +++ b/jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/ocr/utils/StrCharUtil.java @@ -0,0 +1,182 @@ +package org.jeecg.modules.ocr.utils; + +import org.apache.commons.lang.StringUtils; + +/** + * @Description 对比俩个字符串的相似度 + * @Author ZhouWenTao + * @Date 2023/8/11 13:50 + */ +public class StrCharUtil { + + /** + * 获取最长子串 (参数顺序与字符串长短无关) + * + * @param strA + * @param strB + * @return + */ + public static String longestCommonSubstringNoOrder(String strA, String strB) { + if (strA.length() >= strB.length()) { + return longestCommonSubstring(strA, strB); + } else { + return longestCommonSubstring(strB, strA); + } + } + + /** + * 获取最长子串 (长串在前,短串在后) + * + * @param strLong + * @param strShort + * @return

summary

:较长的字符串放到前面有助于提交效率 + */ + private static String longestCommonSubstring(String strLong, String strShort) { + char[] chars_strA = strLong.toCharArray(); + char[] chars_strB = strShort.toCharArray(); + int m = chars_strA.length; + int n = chars_strB.length; + int[][] matrix = new int[m + 1][n + 1]; + for (int i = 1; i <= m; i++) { + for (int j = 1; j <= n; j++) { + if (chars_strA[i - 1] == chars_strB[j - 1]) { + matrix[i][j] = matrix[i - 1][j - 1] + 1; + } else { + matrix[i][j] = Math.max(matrix[i][j - 1], matrix[i - 1][j]); + } + } + } + char[] result = new char[matrix[m][n]]; + int currentIndex = result.length - 1; + while (matrix[m][n] != 0) { + if (matrix[n] == matrix[n - 1]) { + n--; + } else if (matrix[m][n] == matrix[m - 1][n]) { + m--; + } else { + result[currentIndex] = chars_strA[m - 1]; + currentIndex--; + n--; + m--; + } + } + return new String(result); + } + + private static boolean charReg(char charValue) { + return (charValue >= 0x4E00 && charValue <= 0X9FA5) || (charValue >= 'a' && charValue <= 'z') || (charValue >= 'A' && charValue <= 'Z') || (charValue >= '0' && charValue <= '9'); + } + + private static String removeSign(String str) { + StringBuffer sb = new StringBuffer(); + for (char item : str.toCharArray()) { + if (charReg(item)) { + sb.append(item); + } + } + return sb.toString(); + } + + /** + * 比较俩个字符串的相似度(方式一) + * 步骤1:获取两个串中最长共同子串(有序非连续) + * 步骤2:共同子串长度 除以 较长串的长度 + * + * @param strA + * @param strB + * @return 两个字符串的相似度 + */ + public static double SimilarDegree(String strA, String strB) { + String newStrA = removeSign(strA); + String newStrB = removeSign(strB); + int temp = Math.max(newStrA.length(), newStrB.length()); + int temp2 = longestCommonSubstringNoOrder(newStrA, newStrB).length(); + return temp2 * 1.0 / temp; + } + + /** + * 第二种实现方式 (获取两串不匹配字符数) + * + * @param str + * @param target + * @return + */ + private static int compare(String str, String target) { + int d[][]; // 矩阵 + int n = str.length(); + int m = target.length(); + int i; // 遍历str的 + int j; // 遍历target的 + char ch1; // str的 + char ch2; // target的 + int temp; // 记录相同字符,在某个矩阵位置值的增量,不是0就是1 + if (n == 0) { + return m; + } + if (m == 0) { + return n; + } + d = new int[n + 1][m + 1]; + // 初始化第一列 + for (i = 0; i <= n; i++) { + d[i][0] = i; + } + // 初始化第一行 + for (j = 0; j <= m; j++) { + d[0][j] = j; + } + // 遍历str + for (i = 1; i <= n; i++) { + ch1 = str.charAt(i - 1); + // 去匹配target + for (j = 1; j <= m; j++) { + ch2 = target.charAt(j - 1); + if (ch1 == ch2) { + temp = 0; + } else { + temp = 1; + } + + // 左边+1,上边+1, 左上角+temp取最小 + d[i][j] = min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + temp); + } + } + return d[n][m]; + } + + private static int min(int one, int two, int three) { + return (one = one < two ? one : two) < three ? one : three; + } + + /** + * 比较俩个字符串的相似度(方式一) + * 步骤1:获取两个串中不相同的字符数 + * 步骤2:不同字符数 除以 较长串的长度 + * + * @param strA + * @param strB + * @return + */ + public static double similarityRatio(String strA, String strB) { + if (StringUtils.isBlank(strA)||StringUtils.isBlank(strB)) { + return 0; + } + double v = 1 - (double) compare(strA, strB) / Math.max(strA.length(), strB.length()); + if (v<=1.0) { + v = v*100; + } + return v; + } + + + public static void main(String[] args) { + String strA = "河北唐山市协和医院"; + String strB = "河北省唐山协和医院"; + + System.out.println(longestCommonSubstringNoOrder(strA, strB)); + System.out.println(SimilarDegree(strA, strB)); + System.out.println(compare(strA, strB)); + System.out.println(similarityRatio(strA, strB)); + } + +} \ No newline at end of file