【精选】Java 文本内容差异对比实现介绍 |
您所在的位置:网站首页 › wps如何将两篇文章进行对比 › 【精选】Java 文本内容差异对比实现介绍 |
目录
前言一、文本差异对比介绍二、依赖下载三、获取两文件的不同点-patch四、根据patch生成统一的差异格式unifiedDiff五、根据unifiedDiff打补丁六、对比两文件的不同点并按行显示不同
前言
本文是关于文本对比和通过diff算法的简单介绍,如果你想快速通过java+html或者纯JavaScript来实现两文件并排对比可看我另一篇文章: 文本对比,文本差异并排对比显示实现 一、文本差异对比介绍举一个最常见的例子,我们使用git进行提交时,通常会使用git diff --cached来查看这次提交做了哪些改动,这里我们先简单定义一下什么是diff:diff就是目标文本和源文本之间的区别,也就是将源文本变成目标文本所需要的操作。 在 Git 中,有四种diff算法,分别是Myers、Minimal、Patience和Histogram,它们用于获取位于两个不同提交中的两个相同文件的差异。 Myers算法由Eugene W.Myers在1986年发表的一篇论文中提出,是一个能在大部分情况产生”最短的直观的“diff的一个算法。 文本差异对比涉及到的算法介绍 : How different are different diff algorithms in Git Myers Diff 差分算法 好用的在线文本对比: 在线文本比较工具 好用的桌面文本对比软件:Meld、DiffMerge、xxdiff、Diffuse、Kompare 常见的文本差异对比库: 1.java-diff-utils 2.diff-match-patch 本文通过java-diff-utils库来实现文本差异对比 二、依赖下载jar下载网址:io.github.java-diff-utils:java-diff-utils:4.11 官方git地址:java-diff-utils 官方demo示例: java-diff-utils Examples 如果你使用Maven: io.github.java-diff-utils java-diff-utils 4.11如果你使用Gradle: implementation 'io.github.java-diff-utils:java-diff-utils:4.11'本文下面会用到的三个对比文本如下: test1.txt 和 test3.txt : _this.ispc = function(){ var userAgentInfo = navigator.userAgent; var Agents = ["Android", "iPhone", "SymbianOS", "Windows Phone", "iPad", "iPod"]; var flag = true; for (var v = 0; v < Agents.length; v++) { if (userAgentInfo.indexOf(Agents[v]) > 0) { flag = false; break; } } return flag; } window._assignInfo = {}; window._curnodepersons = []; window.attachmode = '0'; window.isEsignature = false; window.upList = []; window.downList = []; window.billId = ''; window.upProcessIdList = []; window.downProcessIdList = []; var urlsplit = window.location.href.split("#"); if(urlsplit.length>1){ //alert(window.location.href); location.href = urlsplit[0]; }test2.txt: _this.ispc = function(){ var userAgentInfo = navigator.userAgent; var Agents = ["Android", "iPhone", "SymbianOS", "Windows Phone", "iPad", "iPod"]; var flag = true; for (var v = 0; v < Agents.length; v++) { if (userAgentInfo.indexOf(Agents[v]) > 0) { flag = false; insert1; insert2; break; } } return flag; } window._assignInfo = {}; window._curnodepersons = []; window.attachmode = '0'; window.isEsignature = false; add window.billId = ''; window.upProcessIdList = []; window.downProcessIdList = []; var urlsplit = window.location.href.split("#"); if(urlsplit.length>1){ //alert(window.location.href); location.href = qazwer[0]; } 三、获取两文件的不同点-patch //原始文件 List original = Files.readAllLines(new File("D:\\test1.txt").toPath()); //对比文件 List revised = Files.readAllLines(new File("D:\\test2.txt").toPath()); //两文件的不同点 Patch patch = DiffUtils.diff(original, revised); for (AbstractDelta delta : patch.getDeltas()) { System.out.println(delta); }输出: [InsertDelta, position: 9, lines: [insert1;, insert2;]] [ChangeDelta, position: 18, lines: [window.upList = [];, window.downList = [];] to [, , add]] [ChangeDelta, position: 26, lines: [ location.href = urlsplit[0];] to [ location.href = qazwer[0];]]InsertDelta代表插入的,ChangeDelta代表删除的或修改的。position代表第几行,lines代表内容。 四、根据patch生成统一的差异格式unifiedDiff //原始文件 List original = Files.readAllLines(new File("D:\\test1.txt").toPath()); //对比文件 List revised = Files.readAllLines(new File("D:\\test2.txt").toPath()); //两文件的不同点 Patch patch = DiffUtils.diff(original, revised); //生成统一的差异格式 List unifiedDiff = UnifiedDiffUtils.generateUnifiedDiff("test1.txt", "test2.txt", original, patch, 0); unifiedDiff.forEach(System.out::println);输出diff: --- test1.txt +++ test2.txt @@ -10,0 +10,2 @@ +insert1; +insert2; @@ -19,2 +21,3 @@ -window.upList = []; -window.downList = []; + + +add @@ -27,1 +30,1 @@ - location.href = urlsplit[0]; + location.href = qazwer[0];说明: 关于输出的diff可参考 :读懂diff 减号代表原始文件test1.txt,加号代表对比文件test2.txt @@ -10,0 +10,2 @@ +insert1; +insert2表示test2增加了2行,分别加了insert1;和insert2这两行 @@ -19,2 +21,3 @@ -window.upList = []; -window.downList = []; + + +add表示test2删除了2行,删除的这两行是test1从19行开始的2行;然后test2又增加了3行,增加是3行是从test2的21行开始的。 @@ -27,1 +30,1 @@ - location.href = urlsplit[0]; + location.href = qazwer[0];表示test2删除了1行,删除的这行是test1从27行开始的1行;然后test2又增加了1行,增加是1行是从test2的30行开始的。(相当于这一行进行了修改) 前端页面美化输出: 如果想将上面通过Java代码得到的两个文件的差异美化输出可以 参考我另一篇博客:Java+html实现文本对比 实现效果如下:
如果你想通过js获取diff,然后通过diff2html渲染到界面可以参考: 实现Diff页面的工程实践 vue-diff-demo 五、根据unifiedDiff打补丁假设你有test1.txt、test2.txt、test3.txt 三个内容一样的文件,有一天你改了test2.txt的文件内容,你想把test2.txt修改的地方也同步的修改运用到 test3.txt上就可以通过打补丁的方式来实现。简单的来说就是test1.txt、test2.txt进行对比得到 unifiedDiff ,把 unifiedDiff运用到 test3.txt 。(相当于git的不同分支的代码同步) //原始文件 List original = Files.readAllLines(new File("D:\\test1.txt").toPath()); //对比文件 List revised = Files.readAllLines(new File("D:\\test2.txt").toPath()); //两文件的不同点 Patch patch = DiffUtils.diff(original, revised); //生成统一的差异格式 List unifiedDiff = UnifiedDiffUtils.generateUnifiedDiff("test1.txt", "test2.txt", original, patch, 0); //从文件或此处从内存导入统一差异格式到补丁 Patch importedPatch = UnifiedDiffUtils.parseUnifiedDiff(unifiedDiff); List test3 = Files.readAllLines(new File("D:\\test3.txt").toPath()); //将差异运用到其他文件打补丁,即将不同点运用到其他文件(相当于git的冲突合并) List patchedText = DiffUtils.patch(test3, importedPatch); for (String patchedTextPow : patchedText) { System.out.println(patchedTextPow); }输出: _this.ispc = function(){ var userAgentInfo = navigator.userAgent; var Agents = ["Android", "iPhone", "SymbianOS", "Windows Phone", "iPad", "iPod"]; var flag = true; for (var v = 0; v flag = false; insert1; insert2; break; } } return flag; } window._assignInfo = {}; window._curnodepersons = []; window.attachmode = '0'; window.isEsignature = false; add window.billId = ''; window.upProcessIdList = []; window.downProcessIdList = []; var urlsplit = window.location.href.split("#"); if(urlsplit.length>1){ //alert(window.location.href); location.href = qazwer[0]; } 六、对比两文件的不同点并按行显示不同 //原始文件 List text1= Files.readAllLines(new File("D:\\test2.txt").toPath()); //对比文件 List text2=Files.readAllLines(new File("D:\\test1.txt").toPath()); //行比较器,原文件删除的内容用"~"包裹,对比文件新增的内容用"**"包裹 DiffRowGenerator generator = DiffRowGenerator.create() .showInlineDiffs(true) .inlineDiffByWord(true) .oldTag(f -> "~") .newTag(f -> "**") .build(); //通过行比较器对比得到每一行的不同 List rows = generator.generateDiffRows(text1, text2); //输出每一行的原始文件和对比文件,每一行的原始文件和对比文件通过 "|"分割 for (DiffRow row : rows) { System.out.println(row.getOldLine() + "|" + row.getNewLine()); }输出: _this.ispc = function(){|_this.ispc = function(){ var userAgentInfo = navigator.userAgent;| var userAgentInfo = navigator.userAgent; var Agents = ["Android", "iPhone",| var Agents = ["Android", "iPhone", "SymbianOS", "Windows Phone",| "SymbianOS", "Windows Phone", "iPad", "iPod"];| "iPad", "iPod"]; var flag = true;| var flag = true; for (var v = 0; v ; Agents.length; v++) {| for (var v = 0; v ; Agents.length; v++) { if (userAgentInfo.indexOf(Agents[v]) ; 0) {| if (userAgentInfo.indexOf(Agents[v]) ; 0) { flag = false;| flag = false; ~insert1;~| ~insert2;~| break;| break; }| } }| } return flag;| return flag; }|} window._assignInfo = {};|window._assignInfo = {}; window._curnodepersons = [];|window._curnodepersons = []; window.attachmode = '0';|window.attachmode = '0'; window.isEsignature = false;|window.isEsignature = false; |**window.upList = [];** |**window.downList = [];** ~add~| window.billId = '';|window.billId = ''; window.upProcessIdList = [];|window.upProcessIdList = []; window.downProcessIdList = [];|window.downProcessIdList = []; var urlsplit = window.location.href.split("#");|var urlsplit = window.location.href.split("#"); if(urlsplit.length;1){|if(urlsplit.length;1){ //alert(window.location.href);| //alert(window.location.href); location.href = ~qazwer~[0];| location.href = **urlsplit**[0]; }|}参考: Diffing more quickly Compare files side by side and hightlight diff using Java | Apache Commons Text diff | Myers algorithm Myers diff algorithm vs Hunt–McIlroy algorithm |
今日新闻 |
推荐新闻 |
CopyRight 2018-2019 办公设备维修网 版权所有 豫ICP备15022753号-3 |