comparison src/xdiff/xpatience.c @ 25709:d5142d87f898 v8.2.3390

patch 8.2.3390: included xdiff code is outdated Commit: https://github.com/vim/vim/commit/ba02e4720f863fdb456e7023520f0a354eec0dcf Author: Christian Brabandt <cb@256bit.org> Date: Tue Aug 31 20:46:39 2021 +0200 patch 8.2.3390: included xdiff code is outdated Problem: Included xdiff code is outdated. Solution: Sync with xdiff in git 2.33. (Christian Brabandt, closes https://github.com/vim/vim/issues/8431)
author Bram Moolenaar <Bram@vim.org>
date Tue, 31 Aug 2021 21:00:05 +0200
parents 3be01cf0a632
children f84e5db372ea
comparison
equal deleted inserted replaced
25708:a1f90f486bf7 25709:d5142d87f898
18 * 18 *
19 * Davide Libenzi <davidel@xmailserver.org> 19 * Davide Libenzi <davidel@xmailserver.org>
20 * 20 *
21 */ 21 */
22 #include "xinclude.h" 22 #include "xinclude.h"
23 #include "xtypes.h"
24 #include "xdiff.h"
25 23
26 /* 24 /*
27 * The basic idea of patience diff is to find lines that are unique in 25 * The basic idea of patience diff is to find lines that are unique in
28 * both files. These are intuitively the ones that we want to see as 26 * both files. These are intuitively the ones that we want to see as
29 * common lines. 27 * common lines.
67 * If 1, this entry can serve as an anchor. See 65 * If 1, this entry can serve as an anchor. See
68 * Documentation/diff-options.txt for more information. 66 * Documentation/diff-options.txt for more information.
69 */ 67 */
70 unsigned anchor : 1; 68 unsigned anchor : 1;
71 } *entries, *first, *last; 69 } *entries, *first, *last;
72 // were common records found? 70 /* were common records found? */
73 unsigned long has_matches; 71 unsigned long has_matches;
74 mmfile_t *file1, *file2; 72 mmfile_t *file1, *file2;
75 xdfenv_t *env; 73 xdfenv_t *env;
76 xpparam_t const *xpp; 74 xpparam_t const *xpp;
77 }; 75 };
78 76
79 static int is_anchor(xpparam_t const *xpp, const char *line) 77 static int is_anchor(xpparam_t const *xpp, const char *line)
80 { 78 {
81 size_t i; 79 int i;
82 for (i = 0; i < xpp->anchors_nr; i++) { 80 for (i = 0; i < (int)xpp->anchors_nr; i++) {
83 if (!strncmp(line, xpp->anchors[i], strlen(xpp->anchors[i]))) 81 if (!strncmp(line, xpp->anchors[i], strlen(xpp->anchors[i])))
84 return 1; 82 return 1;
85 } 83 }
86 return 0; 84 return 0;
87 } 85 }
88 86
89 // The argument "pass" is 1 for the first file, 2 for the second. 87 /* The argument "pass" is 1 for the first file, 2 for the second. */
90 static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map, 88 static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map,
91 int pass) 89 int pass)
92 { 90 {
93 xrecord_t **records = pass == 1 ? 91 xrecord_t **records = pass == 1 ?
94 map->env->xdf1.recs : map->env->xdf2.recs; 92 map->env->xdf1.recs : map->env->xdf2.recs;
95 xrecord_t *record = records[line - 1], *other; 93 xrecord_t *record = records[line - 1];
96 /* 94 /*
97 * After xdl_prepare_env() (or more precisely, due to 95 * After xdl_prepare_env() (or more precisely, due to
98 * xdl_classify_record()), the "ha" member of the records (AKA lines) 96 * xdl_classify_record()), the "ha" member of the records (AKA lines)
99 * is _not_ the hash anymore, but a linearized version of it. In 97 * is _not_ the hash anymore, but a linearized version of it. In
100 * other words, the "ha" member is guaranteed to start with 0 and 98 * other words, the "ha" member is guaranteed to start with 0 and
104 * "unique enough". 102 * "unique enough".
105 */ 103 */
106 int index = (int)((record->ha << 1) % map->alloc); 104 int index = (int)((record->ha << 1) % map->alloc);
107 105
108 while (map->entries[index].line1) { 106 while (map->entries[index].line1) {
109 other = map->env->xdf1.recs[map->entries[index].line1 - 1]; 107 if (map->entries[index].hash != record->ha) {
110 if (map->entries[index].hash != record->ha ||
111 !xdl_recmatch(record->ptr, record->size,
112 other->ptr, other->size,
113 map->xpp->flags)) {
114 if (++index >= map->alloc) 108 if (++index >= map->alloc)
115 index = 0; 109 index = 0;
116 continue; 110 continue;
117 } 111 }
118 if (pass == 2) 112 if (pass == 2)
153 result->file1 = file1; 147 result->file1 = file1;
154 result->file2 = file2; 148 result->file2 = file2;
155 result->xpp = xpp; 149 result->xpp = xpp;
156 result->env = env; 150 result->env = env;
157 151
158 // We know exactly how large we want the hash map 152 /* We know exactly how large we want the hash map */
159 result->alloc = count1 * 2; 153 result->alloc = count1 * 2;
160 result->entries = (struct entry *) 154 result->entries = (struct entry *)
161 xdl_malloc(result->alloc * sizeof(struct entry)); 155 xdl_malloc(result->alloc * sizeof(struct entry));
162 if (!result->entries) 156 if (!result->entries)
163 return -1; 157 return -1;
164 memset(result->entries, 0, result->alloc * sizeof(struct entry)); 158 memset(result->entries, 0, result->alloc * sizeof(struct entry));
165 159
166 // First, fill with entries from the first file 160 /* First, fill with entries from the first file */
167 while (count1--) 161 while (count1--)
168 insert_record(xpp, line1++, result, 1); 162 insert_record(xpp, line1++, result, 1);
169 163
170 // Then search for matches in the second file 164 /* Then search for matches in the second file */
171 while (count2--) 165 while (count2--)
172 insert_record(xpp, line2++, result, 2); 166 insert_record(xpp, line2++, result, 2);
173 167
174 return 0; 168 return 0;
175 } 169 }
183 { 177 {
184 int left = -1, right = longest; 178 int left = -1, right = longest;
185 179
186 while (left + 1 < right) { 180 while (left + 1 < right) {
187 int middle = left + (right - left) / 2; 181 int middle = left + (right - left) / 2;
188 // by construction, no two entries can be equal 182 /* by construction, no two entries can be equal */
189 if (sequence[middle]->line2 > entry->line2) 183 if (sequence[middle]->line2 > entry->line2)
190 right = middle; 184 right = middle;
191 else 185 else
192 left = middle; 186 left = middle;
193 } 187 }
194 // return the index in "sequence", _not_ the sequence length 188 /* return the index in "sequence", _not_ the sequence length */
195 return left; 189 return left;
196 } 190 }
197 191
198 /* 192 /*
199 * The idea is to start with the list of common unique lines sorted by 193 * The idea is to start with the list of common unique lines sorted by
204 * item per sequence length: the sequence with the smallest last 198 * item per sequence length: the sequence with the smallest last
205 * element (in terms of line2). 199 * element (in terms of line2).
206 */ 200 */
207 static struct entry *find_longest_common_sequence(struct hashmap *map) 201 static struct entry *find_longest_common_sequence(struct hashmap *map)
208 { 202 {
209 struct entry **sequence = (struct entry **)xdl_malloc(map->nr * sizeof(struct entry *)); 203 struct entry **sequence = xdl_malloc(map->nr * sizeof(struct entry *));
210 int longest = 0, i; 204 int longest = 0, i;
211 struct entry *entry; 205 struct entry *entry;
206
212 /* 207 /*
213 * If not -1, this entry in sequence must never be overridden. 208 * If not -1, this entry in sequence must never be overridden.
214 * Therefore, overriding entries before this has no effect, so 209 * Therefore, overriding entries before this has no effect, so
215 * do not do that either. 210 * do not do that either.
216 */ 211 */
235 } else if (i == longest) { 230 } else if (i == longest) {
236 longest++; 231 longest++;
237 } 232 }
238 } 233 }
239 234
240 // No common unique lines were found 235 /* No common unique lines were found */
241 if (!longest) { 236 if (!longest) {
242 xdl_free(sequence); 237 xdl_free(sequence);
243 return NULL; 238 return NULL;
244 } 239 }
245 240
246 // Iterate starting at the last element, adjusting the "next" members 241 /* Iterate starting at the last element, adjusting the "next" members */
247 entry = sequence[longest - 1]; 242 entry = sequence[longest - 1];
248 entry->next = NULL; 243 entry->next = NULL;
249 while (entry->previous) { 244 while (entry->previous) {
250 entry->previous->next = entry; 245 entry->previous->next = entry;
251 entry = entry->previous; 246 entry = entry->previous;
256 251
257 static int match(struct hashmap *map, int line1, int line2) 252 static int match(struct hashmap *map, int line1, int line2)
258 { 253 {
259 xrecord_t *record1 = map->env->xdf1.recs[line1 - 1]; 254 xrecord_t *record1 = map->env->xdf1.recs[line1 - 1];
260 xrecord_t *record2 = map->env->xdf2.recs[line2 - 1]; 255 xrecord_t *record2 = map->env->xdf2.recs[line2 - 1];
261 return xdl_recmatch(record1->ptr, record1->size, 256 return record1->ha == record2->ha;
262 record2->ptr, record2->size, map->xpp->flags);
263 } 257 }
264 258
265 static int patience_diff(mmfile_t *file1, mmfile_t *file2, 259 static int patience_diff(mmfile_t *file1, mmfile_t *file2,
266 xpparam_t const *xpp, xdfenv_t *env, 260 xpparam_t const *xpp, xdfenv_t *env,
267 int line1, int count1, int line2, int count2); 261 int line1, int count1, int line2, int count2);
271 { 265 {
272 int end1 = line1 + count1, end2 = line2 + count2; 266 int end1 = line1 + count1, end2 = line2 + count2;
273 int next1, next2; 267 int next1, next2;
274 268
275 for (;;) { 269 for (;;) {
276 // Try to grow the line ranges of common lines 270 /* Try to grow the line ranges of common lines */
277 if (first) { 271 if (first) {
278 next1 = first->line1; 272 next1 = first->line1;
279 next2 = first->line2; 273 next2 = first->line2;
280 while (next1 > line1 && next2 > line2 && 274 while (next1 > line1 && next2 > line2 &&
281 match(map, next1 - 1, next2 - 1)) { 275 match(map, next1 - 1, next2 - 1)) {
290 match(map, line1, line2)) { 284 match(map, line1, line2)) {
291 line1++; 285 line1++;
292 line2++; 286 line2++;
293 } 287 }
294 288
295 // Recurse 289 /* Recurse */
296 if (next1 > line1 || next2 > line2) { 290 if (next1 > line1 || next2 > line2) {
297 struct hashmap submap;
298
299 memset(&submap, 0, sizeof(submap));
300 if (patience_diff(map->file1, map->file2, 291 if (patience_diff(map->file1, map->file2,
301 map->xpp, map->env, 292 map->xpp, map->env,
302 line1, next1 - line1, 293 line1, next1 - line1,
303 line2, next2 - line2)) 294 line2, next2 - line2))
304 return -1; 295 return -1;
321 312
322 static int fall_back_to_classic_diff(struct hashmap *map, 313 static int fall_back_to_classic_diff(struct hashmap *map,
323 int line1, int count1, int line2, int count2) 314 int line1, int count1, int line2, int count2)
324 { 315 {
325 xpparam_t xpp; 316 xpparam_t xpp;
317
318 memset(&xpp, 0, sizeof(xpp));
326 xpp.flags = map->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK; 319 xpp.flags = map->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK;
327 320
328 return xdl_fall_back_diff(map->env, &xpp, 321 return xdl_fall_back_diff(map->env, &xpp,
329 line1, count1, line2, count2); 322 line1, count1, line2, count2);
330 } 323 }
341 { 334 {
342 struct hashmap map; 335 struct hashmap map;
343 struct entry *first; 336 struct entry *first;
344 int result = 0; 337 int result = 0;
345 338
346 // trivial case: one side is empty 339 /* trivial case: one side is empty */
347 if (!count1) { 340 if (!count1) {
348 while(count2--) 341 while(count2--)
349 env->xdf2.rchg[line2++ - 1] = 1; 342 env->xdf2.rchg[line2++ - 1] = 1;
350 return 0; 343 return 0;
351 } else if (!count2) { 344 } else if (!count2) {
357 memset(&map, 0, sizeof(map)); 350 memset(&map, 0, sizeof(map));
358 if (fill_hashmap(file1, file2, xpp, env, &map, 351 if (fill_hashmap(file1, file2, xpp, env, &map,
359 line1, count1, line2, count2)) 352 line1, count1, line2, count2))
360 return -1; 353 return -1;
361 354
362 // are there any matching lines at all? 355 /* are there any matching lines at all? */
363 if (!map.has_matches) { 356 if (!map.has_matches) {
364 while(count1--) 357 while(count1--)
365 env->xdf1.rchg[line1++ - 1] = 1; 358 env->xdf1.rchg[line1++ - 1] = 1;
366 while(count2--) 359 while(count2--)
367 env->xdf2.rchg[line2++ - 1] = 1; 360 env->xdf2.rchg[line2++ - 1] = 1;
385 xpparam_t const *xpp, xdfenv_t *env) 378 xpparam_t const *xpp, xdfenv_t *env)
386 { 379 {
387 if (xdl_prepare_env(file1, file2, xpp, env) < 0) 380 if (xdl_prepare_env(file1, file2, xpp, env) < 0)
388 return -1; 381 return -1;
389 382
390 // environment is cleaned up in xdl_diff() 383 /* environment is cleaned up in xdl_diff() */
391 return patience_diff(file1, file2, xpp, env, 384 return patience_diff(file1, file2, xpp, env,
392 1, env->xdf1.nrec, 1, env->xdf2.nrec); 385 1, env->xdf1.nrec, 1, env->xdf2.nrec);
393 } 386 }