Mercurial > vim
comparison src/textformat.c @ 20695:cea8ae407452 v8.2.0901
patch 8.2.0901: formatting CJK text isn't optimal
Commit: https://github.com/vim/vim/commit/e52702f00322c8a8861efd0bd6a3775e685e5685
Author: Bram Moolenaar <Bram@vim.org>
Date: Thu Jun 4 18:22:13 2020 +0200
patch 8.2.0901: formatting CJK text isn't optimal
Problem: Formatting CJK text isn't optimal.
Solution: Properly break CJK lines. (closes https://github.com/vim/vim/issues/3875)
author | Bram Moolenaar <Bram@vim.org> |
---|---|
date | Thu, 04 Jun 2020 18:30:04 +0200 |
parents | 918245588b50 |
children | e82579016863 |
comparison
equal
deleted
inserted
replaced
20694:3a049f4bdaa2 | 20695:cea8ae407452 |
---|---|
43 int flags, | 43 int flags, |
44 int format_only, | 44 int format_only, |
45 int c) // character to be inserted (can be NUL) | 45 int c) // character to be inserted (can be NUL) |
46 { | 46 { |
47 int cc; | 47 int cc; |
48 int skip_pos; | |
48 int save_char = NUL; | 49 int save_char = NUL; |
49 int haveto_redraw = FALSE; | 50 int haveto_redraw = FALSE; |
50 int fo_ins_blank = has_format_option(FO_INS_BLANK); | 51 int fo_ins_blank = has_format_option(FO_INS_BLANK); |
51 int fo_multibyte = has_format_option(FO_MBYTE_BREAK); | 52 int fo_multibyte = has_format_option(FO_MBYTE_BREAK); |
53 int fo_rigor_tw = has_format_option(FO_RIGOROUS_TW); | |
52 int fo_white_par = has_format_option(FO_WHITE_PAR); | 54 int fo_white_par = has_format_option(FO_WHITE_PAR); |
53 int first_line = TRUE; | 55 int first_line = TRUE; |
54 colnr_T leader_len; | 56 colnr_T leader_len; |
55 int no_leader = FALSE; | 57 int no_leader = FALSE; |
56 int do_comments = (flags & INSCHAR_DO_COM); | 58 int do_comments = (flags & INSCHAR_DO_COM); |
123 coladvance((colnr_T)textwidth); | 125 coladvance((colnr_T)textwidth); |
124 wantcol = curwin->w_cursor.col; | 126 wantcol = curwin->w_cursor.col; |
125 | 127 |
126 curwin->w_cursor.col = startcol; | 128 curwin->w_cursor.col = startcol; |
127 foundcol = 0; | 129 foundcol = 0; |
130 skip_pos = 0; | |
128 | 131 |
129 // Find position to break at. | 132 // Find position to break at. |
130 // Stop at first entered white when 'formatoptions' has 'v' | 133 // Stop at first entered white when 'formatoptions' has 'v' |
131 while ((!fo_ins_blank && !has_format_option(FO_INS_VI)) | 134 while ((!fo_ins_blank && !has_format_option(FO_INS_VI)) |
132 || (flags & INSCHAR_FORMAT) | 135 || (flags & INSCHAR_FORMAT) |
187 end_foundcol = end_col + 1; | 190 end_foundcol = end_col + 1; |
188 foundcol = curwin->w_cursor.col; | 191 foundcol = curwin->w_cursor.col; |
189 if (curwin->w_cursor.col <= (colnr_T)wantcol) | 192 if (curwin->w_cursor.col <= (colnr_T)wantcol) |
190 break; | 193 break; |
191 } | 194 } |
192 else if (cc >= 0x100 && fo_multibyte) | 195 else if ((cc >= 0x100 || !utf_allow_break_before(cc)) && fo_multibyte) |
193 { | 196 { |
197 int ncc; | |
198 int allow_break; | |
199 | |
194 // Break after or before a multi-byte character. | 200 // Break after or before a multi-byte character. |
195 if (curwin->w_cursor.col != startcol) | 201 if (curwin->w_cursor.col != startcol) |
196 { | 202 { |
197 // Don't break until after the comment leader | 203 // Don't break until after the comment leader |
198 if (curwin->w_cursor.col < leader_len) | 204 if (curwin->w_cursor.col < leader_len) |
199 break; | 205 break; |
200 col = curwin->w_cursor.col; | 206 col = curwin->w_cursor.col; |
201 inc_cursor(); | 207 inc_cursor(); |
202 // Don't change end_foundcol if already set. | 208 ncc = gchar_cursor(); |
203 if (foundcol != curwin->w_cursor.col) | 209 |
210 allow_break = | |
211 (enc_utf8 && utf_allow_break(cc, ncc)) | |
212 || enc_dbcs; | |
213 | |
214 // If we have already checked this position, skip! | |
215 if (curwin->w_cursor.col != skip_pos && allow_break) | |
204 { | 216 { |
205 foundcol = curwin->w_cursor.col; | 217 foundcol = curwin->w_cursor.col; |
206 end_foundcol = foundcol; | 218 end_foundcol = foundcol; |
207 if (curwin->w_cursor.col <= (colnr_T)wantcol) | 219 if (curwin->w_cursor.col <= (colnr_T)wantcol) |
208 break; | 220 break; |
211 } | 223 } |
212 | 224 |
213 if (curwin->w_cursor.col == 0) | 225 if (curwin->w_cursor.col == 0) |
214 break; | 226 break; |
215 | 227 |
228 ncc = cc; | |
216 col = curwin->w_cursor.col; | 229 col = curwin->w_cursor.col; |
217 | 230 |
218 dec_cursor(); | 231 dec_cursor(); |
219 cc = gchar_cursor(); | 232 cc = gchar_cursor(); |
220 | 233 |
221 if (WHITECHAR(cc)) | 234 if (WHITECHAR(cc)) |
222 continue; // break with space | 235 continue; // break with space |
223 // Don't break until after the comment leader | 236 // Don't break until after the comment leader. |
224 if (curwin->w_cursor.col < leader_len) | 237 if (curwin->w_cursor.col < leader_len) |
225 break; | 238 break; |
226 | 239 |
227 curwin->w_cursor.col = col; | 240 curwin->w_cursor.col = col; |
228 | 241 skip_pos = curwin->w_cursor.col; |
229 foundcol = curwin->w_cursor.col; | 242 |
230 end_foundcol = foundcol; | 243 allow_break = |
244 (enc_utf8 && utf_allow_break(cc, ncc)) | |
245 || enc_dbcs; | |
246 | |
247 // Must handle this to respect line break prohibition. | |
248 if (allow_break) | |
249 { | |
250 foundcol = curwin->w_cursor.col; | |
251 end_foundcol = foundcol; | |
252 } | |
231 if (curwin->w_cursor.col <= (colnr_T)wantcol) | 253 if (curwin->w_cursor.col <= (colnr_T)wantcol) |
232 break; | 254 { |
255 int ncc_allow_break = | |
256 (enc_utf8 && utf_allow_break_before(ncc)) || enc_dbcs; | |
257 | |
258 if (allow_break) | |
259 break; | |
260 if (!ncc_allow_break && !fo_rigor_tw) | |
261 { | |
262 // Enable at most 1 punct hang outside of textwidth. | |
263 if (curwin->w_cursor.col == startcol) | |
264 { | |
265 // We are inserting a non-breakable char, postpone | |
266 // line break check to next insert. | |
267 end_foundcol = foundcol = 0; | |
268 break; | |
269 } | |
270 | |
271 // Neither cc nor ncc is NUL if we are here, so | |
272 // it's safe to inc_cursor. | |
273 col = curwin->w_cursor.col; | |
274 | |
275 inc_cursor(); | |
276 cc = ncc; | |
277 ncc = gchar_cursor(); | |
278 // handle insert | |
279 ncc = (ncc != NUL) ? ncc : c; | |
280 | |
281 allow_break = | |
282 (enc_utf8 && utf_allow_break(cc, ncc)) | |
283 || enc_dbcs; | |
284 | |
285 if (allow_break) | |
286 { | |
287 // Break only when we are not at end of line. | |
288 end_foundcol = foundcol = | |
289 ncc == NUL? 0 : curwin->w_cursor.col; | |
290 break; | |
291 } | |
292 curwin->w_cursor.col = col; | |
293 } | |
294 } | |
233 } | 295 } |
234 if (curwin->w_cursor.col == 0) | 296 if (curwin->w_cursor.col == 0) |
235 break; | 297 break; |
236 dec_cursor(); | 298 dec_cursor(); |
237 } | 299 } |