vim: src/textformat.c comparison

comparison src/textformat.c @ 20695:cea8ae407452 v8.2.0901

patch 8.2.0901: formatting CJK text isn't optimal Commit: https://github.com/vim/vim/commit/e52702f00322c8a8861efd0bd6a3775e685e5685 Author: Bram Moolenaar <Bram@vim.org> Date: Thu Jun 4 18:22:13 2020 +0200 patch 8.2.0901: formatting CJK text isn't optimal Problem: Formatting CJK text isn't optimal. Solution: Properly break CJK lines. (closes https://github.com/vim/vim/issues/3875)

author	Bram Moolenaar <Bram@vim.org>
date	Thu, 04 Jun 2020 18:30:04 +0200
parents	918245588b50
children	e82579016863

comparison

equal deleted inserted replaced

-:3a049f4bdaa2
+:cea8ae407452
 int		flags,
 int		format_only,
 int		c) // character to be inserted (can be NUL)
 {
 int		cc;
+int		skip_pos;
 int		save_char = NUL;
 int		haveto_redraw = FALSE;
 int		fo_ins_blank = has_format_option(FO_INS_BLANK);
 int		fo_multibyte = has_format_option(FO_MBYTE_BREAK);
+int		fo_rigor_tw  = has_format_option(FO_RIGOROUS_TW);
 int		fo_white_par = has_format_option(FO_WHITE_PAR);
 int		first_line = TRUE;
 colnr_T	leader_len;
 int		no_leader = FALSE;
 int		do_comments = (flags & INSCHAR_DO_COM);
 	coladvance((colnr_T)textwidth);
 	wantcol = curwin->w_cursor.col;
 	curwin->w_cursor.col = startcol;
 	foundcol = 0;
+	skip_pos = 0;
 	// Find position to break at.
 	// Stop at first entered white when 'formatoptions' has 'v'
 	while ((!fo_ins_blank && !has_format_option(FO_INS_VI))
 		    || (flags & INSCHAR_FORMAT)
 		end_foundcol = end_col + 1;
 		foundcol = curwin->w_cursor.col;
 		if (curwin->w_cursor.col <= (colnr_T)wantcol)
 		    break;
 	    }
-	    else if (cc >= 0x100 && fo_multibyte)
+	    else if ((cc >= 0x100 || !utf_allow_break_before(cc)) && fo_multibyte)
 	    {
+		int ncc;
+		int allow_break;
 		// Break after or before a multi-byte character.
 		if (curwin->w_cursor.col != startcol)
 		{
 		    // Don't break until after the comment leader
 		    if (curwin->w_cursor.col < leader_len)
 			break;
 		    col = curwin->w_cursor.col;
 		    inc_cursor();
-		    // Don't change end_foundcol if already set.
+		    ncc = gchar_cursor();
-		    if (foundcol != curwin->w_cursor.col)
+		    allow_break =
+			(enc_utf8 && utf_allow_break(cc, ncc))
+			|| enc_dbcs;
+		    // If we have already checked this position, skip!
+		    if (curwin->w_cursor.col != skip_pos && allow_break)
 		    {
 			foundcol = curwin->w_cursor.col;
 			end_foundcol = foundcol;
 			if (curwin->w_cursor.col <= (colnr_T)wantcol)
 			    break;
 		}
 		if (curwin->w_cursor.col == 0)
 		    break;
+		ncc = cc;
 		col = curwin->w_cursor.col;
 		dec_cursor();
 		cc = gchar_cursor();
 		if (WHITECHAR(cc))
 		    continue;		// break with space
-		// Don't break until after the comment leader
+		// Don't break until after the comment leader.
 		if (curwin->w_cursor.col < leader_len)
 		    break;
 		curwin->w_cursor.col = col;
+		skip_pos = curwin->w_cursor.col;
-		foundcol = curwin->w_cursor.col;
-		end_foundcol = foundcol;
+		allow_break =
+		    (enc_utf8 && utf_allow_break(cc, ncc))
+		    || enc_dbcs;
+		// Must handle this to respect line break prohibition.
+		if (allow_break)
+		{
+		    foundcol = curwin->w_cursor.col;
+		    end_foundcol = foundcol;
+		}
 		if (curwin->w_cursor.col <= (colnr_T)wantcol)
-		    break;
+		{
+		    int ncc_allow_break =
+			 (enc_utf8 && utf_allow_break_before(ncc)) || enc_dbcs;
+		    if (allow_break)
+			break;
+		    if (!ncc_allow_break && !fo_rigor_tw)
+		    {
+			// Enable at most 1 punct hang outside of textwidth.
+			if (curwin->w_cursor.col == startcol)
+			{
+			    // We are inserting a non-breakable char, postpone
+			    // line break check to next insert.
+			    end_foundcol = foundcol = 0;
+			    break;
+			}
+			// Neither cc nor ncc is NUL if we are here, so
+			// it's safe to inc_cursor.
+			col = curwin->w_cursor.col;
+			inc_cursor();
+			cc  = ncc;
+			ncc = gchar_cursor();
+			// handle insert
+			ncc = (ncc != NUL) ? ncc : c;
+			allow_break =
+				(enc_utf8 && utf_allow_break(cc, ncc))
+				|| enc_dbcs;
+			if (allow_break)
+			{
+			    // Break only when we are not at end of line.
+			    end_foundcol = foundcol =
+				      ncc == NUL? 0 : curwin->w_cursor.col;
+			    break;
+			}
+			curwin->w_cursor.col = col;
+		    }
+		}
 	    }
 	    if (curwin->w_cursor.col == 0)
 		break;
 	    dec_cursor();
 	}

Mercurial > vim

comparison src/textformat.c @ 20695:cea8ae407452 v8.2.0901