comparison src/textformat.c @ 20695:cea8ae407452 v8.2.0901

patch 8.2.0901: formatting CJK text isn't optimal Commit: https://github.com/vim/vim/commit/e52702f00322c8a8861efd0bd6a3775e685e5685 Author: Bram Moolenaar <Bram@vim.org> Date: Thu Jun 4 18:22:13 2020 +0200 patch 8.2.0901: formatting CJK text isn't optimal Problem: Formatting CJK text isn't optimal. Solution: Properly break CJK lines. (closes https://github.com/vim/vim/issues/3875)
author Bram Moolenaar <Bram@vim.org>
date Thu, 04 Jun 2020 18:30:04 +0200
parents 918245588b50
children e82579016863
comparison
equal deleted inserted replaced
20694:3a049f4bdaa2 20695:cea8ae407452
43 int flags, 43 int flags,
44 int format_only, 44 int format_only,
45 int c) // character to be inserted (can be NUL) 45 int c) // character to be inserted (can be NUL)
46 { 46 {
47 int cc; 47 int cc;
48 int skip_pos;
48 int save_char = NUL; 49 int save_char = NUL;
49 int haveto_redraw = FALSE; 50 int haveto_redraw = FALSE;
50 int fo_ins_blank = has_format_option(FO_INS_BLANK); 51 int fo_ins_blank = has_format_option(FO_INS_BLANK);
51 int fo_multibyte = has_format_option(FO_MBYTE_BREAK); 52 int fo_multibyte = has_format_option(FO_MBYTE_BREAK);
53 int fo_rigor_tw = has_format_option(FO_RIGOROUS_TW);
52 int fo_white_par = has_format_option(FO_WHITE_PAR); 54 int fo_white_par = has_format_option(FO_WHITE_PAR);
53 int first_line = TRUE; 55 int first_line = TRUE;
54 colnr_T leader_len; 56 colnr_T leader_len;
55 int no_leader = FALSE; 57 int no_leader = FALSE;
56 int do_comments = (flags & INSCHAR_DO_COM); 58 int do_comments = (flags & INSCHAR_DO_COM);
123 coladvance((colnr_T)textwidth); 125 coladvance((colnr_T)textwidth);
124 wantcol = curwin->w_cursor.col; 126 wantcol = curwin->w_cursor.col;
125 127
126 curwin->w_cursor.col = startcol; 128 curwin->w_cursor.col = startcol;
127 foundcol = 0; 129 foundcol = 0;
130 skip_pos = 0;
128 131
129 // Find position to break at. 132 // Find position to break at.
130 // Stop at first entered white when 'formatoptions' has 'v' 133 // Stop at first entered white when 'formatoptions' has 'v'
131 while ((!fo_ins_blank && !has_format_option(FO_INS_VI)) 134 while ((!fo_ins_blank && !has_format_option(FO_INS_VI))
132 || (flags & INSCHAR_FORMAT) 135 || (flags & INSCHAR_FORMAT)
187 end_foundcol = end_col + 1; 190 end_foundcol = end_col + 1;
188 foundcol = curwin->w_cursor.col; 191 foundcol = curwin->w_cursor.col;
189 if (curwin->w_cursor.col <= (colnr_T)wantcol) 192 if (curwin->w_cursor.col <= (colnr_T)wantcol)
190 break; 193 break;
191 } 194 }
192 else if (cc >= 0x100 && fo_multibyte) 195 else if ((cc >= 0x100 || !utf_allow_break_before(cc)) && fo_multibyte)
193 { 196 {
197 int ncc;
198 int allow_break;
199
194 // Break after or before a multi-byte character. 200 // Break after or before a multi-byte character.
195 if (curwin->w_cursor.col != startcol) 201 if (curwin->w_cursor.col != startcol)
196 { 202 {
197 // Don't break until after the comment leader 203 // Don't break until after the comment leader
198 if (curwin->w_cursor.col < leader_len) 204 if (curwin->w_cursor.col < leader_len)
199 break; 205 break;
200 col = curwin->w_cursor.col; 206 col = curwin->w_cursor.col;
201 inc_cursor(); 207 inc_cursor();
202 // Don't change end_foundcol if already set. 208 ncc = gchar_cursor();
203 if (foundcol != curwin->w_cursor.col) 209
210 allow_break =
211 (enc_utf8 && utf_allow_break(cc, ncc))
212 || enc_dbcs;
213
214 // If we have already checked this position, skip!
215 if (curwin->w_cursor.col != skip_pos && allow_break)
204 { 216 {
205 foundcol = curwin->w_cursor.col; 217 foundcol = curwin->w_cursor.col;
206 end_foundcol = foundcol; 218 end_foundcol = foundcol;
207 if (curwin->w_cursor.col <= (colnr_T)wantcol) 219 if (curwin->w_cursor.col <= (colnr_T)wantcol)
208 break; 220 break;
211 } 223 }
212 224
213 if (curwin->w_cursor.col == 0) 225 if (curwin->w_cursor.col == 0)
214 break; 226 break;
215 227
228 ncc = cc;
216 col = curwin->w_cursor.col; 229 col = curwin->w_cursor.col;
217 230
218 dec_cursor(); 231 dec_cursor();
219 cc = gchar_cursor(); 232 cc = gchar_cursor();
220 233
221 if (WHITECHAR(cc)) 234 if (WHITECHAR(cc))
222 continue; // break with space 235 continue; // break with space
223 // Don't break until after the comment leader 236 // Don't break until after the comment leader.
224 if (curwin->w_cursor.col < leader_len) 237 if (curwin->w_cursor.col < leader_len)
225 break; 238 break;
226 239
227 curwin->w_cursor.col = col; 240 curwin->w_cursor.col = col;
228 241 skip_pos = curwin->w_cursor.col;
229 foundcol = curwin->w_cursor.col; 242
230 end_foundcol = foundcol; 243 allow_break =
244 (enc_utf8 && utf_allow_break(cc, ncc))
245 || enc_dbcs;
246
247 // Must handle this to respect line break prohibition.
248 if (allow_break)
249 {
250 foundcol = curwin->w_cursor.col;
251 end_foundcol = foundcol;
252 }
231 if (curwin->w_cursor.col <= (colnr_T)wantcol) 253 if (curwin->w_cursor.col <= (colnr_T)wantcol)
232 break; 254 {
255 int ncc_allow_break =
256 (enc_utf8 && utf_allow_break_before(ncc)) || enc_dbcs;
257
258 if (allow_break)
259 break;
260 if (!ncc_allow_break && !fo_rigor_tw)
261 {
262 // Enable at most 1 punct hang outside of textwidth.
263 if (curwin->w_cursor.col == startcol)
264 {
265 // We are inserting a non-breakable char, postpone
266 // line break check to next insert.
267 end_foundcol = foundcol = 0;
268 break;
269 }
270
271 // Neither cc nor ncc is NUL if we are here, so
272 // it's safe to inc_cursor.
273 col = curwin->w_cursor.col;
274
275 inc_cursor();
276 cc = ncc;
277 ncc = gchar_cursor();
278 // handle insert
279 ncc = (ncc != NUL) ? ncc : c;
280
281 allow_break =
282 (enc_utf8 && utf_allow_break(cc, ncc))
283 || enc_dbcs;
284
285 if (allow_break)
286 {
287 // Break only when we are not at end of line.
288 end_foundcol = foundcol =
289 ncc == NUL? 0 : curwin->w_cursor.col;
290 break;
291 }
292 curwin->w_cursor.col = col;
293 }
294 }
233 } 295 }
234 if (curwin->w_cursor.col == 0) 296 if (curwin->w_cursor.col == 0)
235 break; 297 break;
236 dec_cursor(); 298 dec_cursor();
237 } 299 }