Mercurial > vim
comparison runtime/scripts.vim @ 27692:7346315e8517 v8.2.4372
patch 8.2.4372: filetype detection from file contents is in legacy script
Commit: https://github.com/vim/vim/commit/299d8e5eec8f8ae91177f7feb67ad59402dfa8e2
Author: Bram Moolenaar <Bram@vim.org>
Date: Sun Feb 13 20:32:02 2022 +0000
patch 8.2.4372: filetype detection from file contents is in legacy script
Problem: Filetype detection from file contents is in legacy script.
Solution: Use a compiled function for filetype detection.
author | Bram Moolenaar <Bram@vim.org> |
---|---|
date | Sun, 13 Feb 2022 21:45:02 +0100 |
parents | 0b5ce27d8b68 |
children | 635de73eeb4c |
comparison
equal
deleted
inserted
replaced
27691:8ecc769fa19b | 27692:7346315e8517 |
---|---|
1 " Vim support file to detect file types in scripts | 1 " Vim support file to detect file types in scripts |
2 " | 2 " |
3 " Maintainer: Bram Moolenaar <Bram@vim.org> | 3 " Maintainer: Bram Moolenaar <Bram@vim.org> |
4 " Last change: 2021 Jan 22 | 4 " Last change: 2022 Feb 13 |
5 | 5 |
6 " This file is called by an autocommand for every file that has just been | 6 " This file is called by an autocommand for every file that has just been |
7 " loaded into a buffer. It checks if the type of file can be recognized by | 7 " loaded into a buffer. It checks if the type of file can be recognized by |
8 " the file contents. The autocommand is in $VIMRUNTIME/filetype.vim. | 8 " the file contents. The autocommand is in $VIMRUNTIME/filetype.vim. |
9 " | 9 " |
10 " Note that the pattern matches are done with =~# to avoid the value of the | 10 " Note that the pattern matches are done with =~# to avoid the value of the |
11 " 'ignorecase' option making a difference. Where case is to be ignored use | 11 " 'ignorecase' option making a difference. Where case is to be ignored use |
12 " =~? instead. Do not use =~ anywhere. | 12 " =~? instead. Do not use =~ anywhere. |
13 | 13 |
14 | 14 |
15 " Only do the rest when the FileType autocommand has not been triggered yet. | 15 " Bail out when a FileType autocommand has already set the filetype. |
16 if did_filetype() | 16 if did_filetype() |
17 finish | 17 finish |
18 endif | 18 endif |
19 | 19 |
20 " Load the user defined scripts file first | 20 " Load the user defined scripts file first |
24 if did_filetype() | 24 if did_filetype() |
25 finish | 25 finish |
26 endif | 26 endif |
27 endif | 27 endif |
28 | 28 |
29 " Line continuation is used here, remove 'C' from 'cpoptions' | 29 " The main code is in a compiled function for speed. |
30 let s:cpo_save = &cpo | 30 call dist#script#DetectFiletype() |
31 set cpo&vim | |
32 | |
33 let s:line1 = getline(1) | |
34 | |
35 if s:line1 =~# "^#!" | |
36 " A script that starts with "#!". | |
37 | |
38 " Check for a line like "#!/usr/bin/env {options} bash". Turn it into | |
39 " "#!/usr/bin/bash" to make matching easier. | |
40 " Recognize only a few {options} that are commonly used. | |
41 if s:line1 =~# '^#!\s*\S*\<env\s' | |
42 let s:line1 = substitute(s:line1, '\S\+=\S\+', '', 'g') | |
43 let s:line1 = substitute(s:line1, '\(-[iS]\|--ignore-environment\|--split-string\)', '', '') | |
44 let s:line1 = substitute(s:line1, '\<env\s\+', '', '') | |
45 endif | |
46 | |
47 " Get the program name. | |
48 " Only accept spaces in PC style paths: "#!c:/program files/perl [args]". | |
49 " If the word env is used, use the first word after the space: | |
50 " "#!/usr/bin/env perl [path/args]" | |
51 " If there is no path use the first word: "#!perl [path/args]". | |
52 " Otherwise get the last word after a slash: "#!/usr/bin/perl [path/args]". | |
53 if s:line1 =~# '^#!\s*\a:[/\\]' | |
54 let s:name = substitute(s:line1, '^#!.*[/\\]\(\i\+\).*', '\1', '') | |
55 elseif s:line1 =~# '^#!.*\<env\>' | |
56 let s:name = substitute(s:line1, '^#!.*\<env\>\s\+\(\i\+\).*', '\1', '') | |
57 elseif s:line1 =~# '^#!\s*[^/\\ ]*\>\([^/\\]\|$\)' | |
58 let s:name = substitute(s:line1, '^#!\s*\([^/\\ ]*\>\).*', '\1', '') | |
59 else | |
60 let s:name = substitute(s:line1, '^#!\s*\S*[/\\]\(\i\+\).*', '\1', '') | |
61 endif | |
62 | |
63 " tcl scripts may have #!/bin/sh in the first line and "exec wish" in the | |
64 " third line. Suggested by Steven Atkinson. | |
65 if getline(3) =~# '^exec wish' | |
66 let s:name = 'wish' | |
67 endif | |
68 | |
69 " Bourne-like shell scripts: bash bash2 ksh ksh93 sh | |
70 if s:name =~# '^\(bash\d*\|\|ksh\d*\|sh\)\>' | |
71 call dist#ft#SetFileTypeSH(s:line1) " defined in filetype.vim | |
72 | |
73 " csh scripts | |
74 elseif s:name =~# '^csh\>' | |
75 if exists("g:filetype_csh") | |
76 call dist#ft#SetFileTypeShell(g:filetype_csh) | |
77 else | |
78 call dist#ft#SetFileTypeShell("csh") | |
79 endif | |
80 | |
81 " tcsh scripts | |
82 elseif s:name =~# '^tcsh\>' | |
83 call dist#ft#SetFileTypeShell("tcsh") | |
84 | |
85 " Z shell scripts | |
86 elseif s:name =~# '^zsh\>' | |
87 set ft=zsh | |
88 | |
89 " TCL scripts | |
90 elseif s:name =~# '^\(tclsh\|wish\|expectk\|itclsh\|itkwish\)\>' | |
91 set ft=tcl | |
92 | |
93 " Expect scripts | |
94 elseif s:name =~# '^expect\>' | |
95 set ft=expect | |
96 | |
97 " Gnuplot scripts | |
98 elseif s:name =~# '^gnuplot\>' | |
99 set ft=gnuplot | |
100 | |
101 " Makefiles | |
102 elseif s:name =~# 'make\>' | |
103 set ft=make | |
104 | |
105 " Pike | |
106 elseif s:name =~# '^pike\%(\>\|[0-9]\)' | |
107 set ft=pike | |
108 | |
109 " Lua | |
110 elseif s:name =~# 'lua' | |
111 set ft=lua | |
112 | |
113 " Perl | |
114 elseif s:name =~# 'perl' | |
115 set ft=perl | |
116 | |
117 " PHP | |
118 elseif s:name =~# 'php' | |
119 set ft=php | |
120 | |
121 " Python | |
122 elseif s:name =~# 'python' | |
123 set ft=python | |
124 | |
125 " Groovy | |
126 elseif s:name =~# '^groovy\>' | |
127 set ft=groovy | |
128 | |
129 " Raku | |
130 elseif s:name =~# 'raku' | |
131 set ft=raku | |
132 | |
133 " Ruby | |
134 elseif s:name =~# 'ruby' | |
135 set ft=ruby | |
136 | |
137 " JavaScript | |
138 elseif s:name =~# 'node\(js\)\=\>\|js\>' || s:name =~# 'rhino\>' | |
139 set ft=javascript | |
140 | |
141 " BC calculator | |
142 elseif s:name =~# '^bc\>' | |
143 set ft=bc | |
144 | |
145 " sed | |
146 elseif s:name =~# 'sed\>' | |
147 set ft=sed | |
148 | |
149 " OCaml-scripts | |
150 elseif s:name =~# 'ocaml' | |
151 set ft=ocaml | |
152 | |
153 " Awk scripts; also finds "gawk" | |
154 elseif s:name =~# 'awk\>' | |
155 set ft=awk | |
156 | |
157 " Website MetaLanguage | |
158 elseif s:name =~# 'wml' | |
159 set ft=wml | |
160 | |
161 " Scheme scripts | |
162 elseif s:name =~# 'scheme' | |
163 set ft=scheme | |
164 | |
165 " CFEngine scripts | |
166 elseif s:name =~# 'cfengine' | |
167 set ft=cfengine | |
168 | |
169 " Erlang scripts | |
170 elseif s:name =~# 'escript' | |
171 set ft=erlang | |
172 | |
173 " Haskell | |
174 elseif s:name =~# 'haskell' | |
175 set ft=haskell | |
176 | |
177 " Scala | |
178 elseif s:name =~# 'scala\>' | |
179 set ft=scala | |
180 | |
181 " Clojure | |
182 elseif s:name =~# 'clojure' | |
183 set ft=clojure | |
184 | |
185 " Free Pascal | |
186 elseif s:name =~# 'instantfpc\>' | |
187 set ft=pascal | |
188 | |
189 " Fennel | |
190 elseif s:name =~# 'fennel\>' | |
191 set ft=fennel | |
192 | |
193 " MikroTik RouterOS script | |
194 elseif s:name =~# 'rsc\>' | |
195 set ft=routeros | |
196 | |
197 " Fish shell | |
198 elseif s:name =~# 'fish\>' | |
199 set ft=fish | |
200 | |
201 " Gforth | |
202 elseif s:name =~# 'gforth\>' | |
203 set ft=forth | |
204 | |
205 endif | |
206 unlet s:name | |
207 | |
208 else | |
209 " File does not start with "#!". | |
210 | |
211 let s:line2 = getline(2) | |
212 let s:line3 = getline(3) | |
213 let s:line4 = getline(4) | |
214 let s:line5 = getline(5) | |
215 | |
216 " Bourne-like shell scripts: sh ksh bash bash2 | |
217 if s:line1 =~# '^:$' | |
218 call dist#ft#SetFileTypeSH(s:line1) " defined in filetype.vim | |
219 | |
220 " Z shell scripts | |
221 elseif s:line1 =~# '^#compdef\>' || s:line1 =~# '^#autoload\>' || | |
222 \ "\n".s:line1."\n".s:line2."\n".s:line3."\n".s:line4."\n".s:line5 =~# '\n\s*emulate\s\+\%(-[LR]\s\+\)\=[ckz]\=sh\>' | |
223 set ft=zsh | |
224 | |
225 " ELM Mail files | |
226 elseif s:line1 =~# '^From \([a-zA-Z][a-zA-Z_0-9\.=-]*\(@[^ ]*\)\=\|-\) .* \(19\|20\)\d\d$' | |
227 set ft=mail | |
228 | |
229 " Mason | |
230 elseif s:line1 =~# '^<[%&].*>' | |
231 set ft=mason | |
232 | |
233 " Vim scripts (must have '" vim' as the first line to trigger this) | |
234 elseif s:line1 =~# '^" *[vV]im$' | |
235 set ft=vim | |
236 | |
237 " libcxx and libstdc++ standard library headers like "iostream" do not have | |
238 " an extension, recognize the Emacs file mode. | |
239 elseif s:line1 =~? '-\*-.*C++.*-\*-' | |
240 set ft=cpp | |
241 | |
242 " MOO | |
243 elseif s:line1 =~# '^\*\* LambdaMOO Database, Format Version \%([1-3]\>\)\@!\d\+ \*\*$' | |
244 set ft=moo | |
245 | |
246 " Diff file: | |
247 " - "diff" in first line (context diff) | |
248 " - "Only in " in first line | |
249 " - "--- " in first line and "+++ " in second line (unified diff). | |
250 " - "*** " in first line and "--- " in second line (context diff). | |
251 " - "# It was generated by makepatch " in the second line (makepatch diff). | |
252 " - "Index: <filename>" in the first line (CVS file) | |
253 " - "=== ", line of "=", "---", "+++ " (SVK diff) | |
254 " - "=== ", "--- ", "+++ " (bzr diff, common case) | |
255 " - "=== (removed|added|renamed|modified)" (bzr diff, alternative) | |
256 " - "# HG changeset patch" in first line (Mercurial export format) | |
257 elseif s:line1 =~# '^\(diff\>\|Only in \|\d\+\(,\d\+\)\=[cda]\d\+\>\|# It was generated by makepatch \|Index:\s\+\f\+\r\=$\|===== \f\+ \d\+\.\d\+ vs edited\|==== //\f\+#\d\+\|# HG changeset patch\)' | |
258 \ || (s:line1 =~# '^--- ' && s:line2 =~# '^+++ ') | |
259 \ || (s:line1 =~# '^\* looking for ' && s:line2 =~# '^\* comparing to ') | |
260 \ || (s:line1 =~# '^\*\*\* ' && s:line2 =~# '^--- ') | |
261 \ || (s:line1 =~# '^=== ' && ((s:line2 =~# '^=\{66\}' && s:line3 =~# '^--- ' && s:line4 =~# '^+++') || (s:line2 =~# '^--- ' && s:line3 =~# '^+++ '))) | |
262 \ || (s:line1 =~# '^=== \(removed\|added\|renamed\|modified\)') | |
263 set ft=diff | |
264 | |
265 " PostScript Files (must have %!PS as the first line, like a2ps output) | |
266 elseif s:line1 =~# '^%![ \t]*PS' | |
267 set ft=postscr | |
268 | |
269 " M4 scripts: Guess there is a line that starts with "dnl". | |
270 elseif s:line1 =~# '^\s*dnl\>' | |
271 \ || s:line2 =~# '^\s*dnl\>' | |
272 \ || s:line3 =~# '^\s*dnl\>' | |
273 \ || s:line4 =~# '^\s*dnl\>' | |
274 \ || s:line5 =~# '^\s*dnl\>' | |
275 set ft=m4 | |
276 | |
277 " AmigaDos scripts | |
278 elseif $TERM == "amiga" | |
279 \ && (s:line1 =~# "^;" || s:line1 =~? '^\.bra') | |
280 set ft=amiga | |
281 | |
282 " SiCAD scripts (must have procn or procd as the first line to trigger this) | |
283 elseif s:line1 =~? '^ *proc[nd] *$' | |
284 set ft=sicad | |
285 | |
286 " Purify log files start with "**** Purify" | |
287 elseif s:line1 =~# '^\*\*\*\* Purify' | |
288 set ft=purifylog | |
289 | |
290 " XML | |
291 elseif s:line1 =~# '<?\s*xml.*?>' | |
292 set ft=xml | |
293 | |
294 " XHTML (e.g.: PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN") | |
295 elseif s:line1 =~# '\<DTD\s\+XHTML\s' | |
296 set ft=xhtml | |
297 | |
298 " HTML (e.g.: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN") | |
299 " Avoid "doctype html", used by slim. | |
300 elseif s:line1 =~? '<!DOCTYPE\s\+html\>' | |
301 set ft=html | |
302 | |
303 " PDF | |
304 elseif s:line1 =~# '^%PDF-' | |
305 set ft=pdf | |
306 | |
307 " XXD output | |
308 elseif s:line1 =~# '^\x\{7}: \x\{2} \=\x\{2} \=\x\{2} \=\x\{2} ' | |
309 set ft=xxd | |
310 | |
311 " RCS/CVS log output | |
312 elseif s:line1 =~# '^RCS file:' || s:line2 =~# '^RCS file:' | |
313 set ft=rcslog | |
314 | |
315 " CVS commit | |
316 elseif s:line2 =~# '^CVS:' || getline("$") =~# '^CVS: ' | |
317 set ft=cvs | |
318 | |
319 " Prescribe | |
320 elseif s:line1 =~# '^!R!' | |
321 set ft=prescribe | |
322 | |
323 " Send-pr | |
324 elseif s:line1 =~# '^SEND-PR:' | |
325 set ft=sendpr | |
326 | |
327 " SNNS files | |
328 elseif s:line1 =~# '^SNNS network definition file' | |
329 set ft=snnsnet | |
330 elseif s:line1 =~# '^SNNS pattern definition file' | |
331 set ft=snnspat | |
332 elseif s:line1 =~# '^SNNS result file' | |
333 set ft=snnsres | |
334 | |
335 " Virata | |
336 elseif s:line1 =~# '^%.\{-}[Vv]irata' | |
337 \ || s:line2 =~# '^%.\{-}[Vv]irata' | |
338 \ || s:line3 =~# '^%.\{-}[Vv]irata' | |
339 \ || s:line4 =~# '^%.\{-}[Vv]irata' | |
340 \ || s:line5 =~# '^%.\{-}[Vv]irata' | |
341 set ft=virata | |
342 | |
343 " Strace | |
344 elseif s:line1 =~# '[0-9:.]* *execve(' || s:line1 =~# '^__libc_start_main' | |
345 set ft=strace | |
346 | |
347 " VSE JCL | |
348 elseif s:line1 =~# '^\* $$ JOB\>' || s:line1 =~# '^// *JOB\>' | |
349 set ft=vsejcl | |
350 | |
351 " TAK and SINDA | |
352 elseif s:line4 =~# 'K & K Associates' || s:line2 =~# 'TAK 2000' | |
353 set ft=takout | |
354 elseif s:line3 =~# 'S Y S T E M S I M P R O V E D ' | |
355 set ft=sindaout | |
356 elseif getline(6) =~# 'Run Date: ' | |
357 set ft=takcmp | |
358 elseif getline(9) =~# 'Node File 1' | |
359 set ft=sindacmp | |
360 | |
361 " DNS zone files | |
362 elseif s:line1.s:line2.s:line3.s:line4 =~# '^; <<>> DiG [0-9.]\+.* <<>>\|$ORIGIN\|$TTL\|IN\s\+SOA' | |
363 set ft=bindzone | |
364 | |
365 " BAAN | |
366 elseif s:line1 =~# '|\*\{1,80}' && s:line2 =~# 'VRC ' | |
367 \ || s:line2 =~# '|\*\{1,80}' && s:line3 =~# 'VRC ' | |
368 set ft=baan | |
369 | |
370 " Valgrind | |
371 elseif s:line1 =~# '^==\d\+== valgrind' || s:line3 =~# '^==\d\+== Using valgrind' | |
372 set ft=valgrind | |
373 | |
374 " Go docs | |
375 elseif s:line1 =~# '^PACKAGE DOCUMENTATION$' | |
376 set ft=godoc | |
377 | |
378 " Renderman Interface Bytestream | |
379 elseif s:line1 =~# '^##RenderMan' | |
380 set ft=rib | |
381 | |
382 " Scheme scripts | |
383 elseif s:line1 =~# 'exec\s\+\S*scheme' || s:line2 =~# 'exec\s\+\S*scheme' | |
384 set ft=scheme | |
385 | |
386 " Git output | |
387 elseif s:line1 =~# '^\(commit\|tree\|object\) \x\{40,\}\>\|^tag \S\+$' | |
388 set ft=git | |
389 | |
390 " Gprof (gnu profiler) | |
391 elseif s:line1 == 'Flat profile:' | |
392 \ && s:line2 == '' | |
393 \ && s:line3 =~# '^Each sample counts as .* seconds.$' | |
394 set ft=gprof | |
395 | |
396 " Erlang terms | |
397 " (See also: http://www.gnu.org/software/emacs/manual/html_node/emacs/Choosing-Modes.html#Choosing-Modes) | |
398 elseif s:line1 =~? '-\*-.*erlang.*-\*-' | |
399 set ft=erlang | |
400 | |
401 " YAML | |
402 elseif s:line1 =~# '^%YAML' | |
403 set ft=yaml | |
404 | |
405 " MikroTik RouterOS script | |
406 elseif s:line1 =~# '^#.*by RouterOS.*$' | |
407 set ft=routeros | |
408 | |
409 " Sed scripts | |
410 " #ncomment is allowed but most likely a false positive so require a space | |
411 " before any trailing comment text | |
412 elseif s:line1 =~# '^#n\%($\|\s\)' | |
413 set ft=sed | |
414 | |
415 " CVS diff | |
416 else | |
417 let s:lnum = 1 | |
418 while getline(s:lnum) =~# "^? " && s:lnum < line("$") | |
419 let s:lnum += 1 | |
420 endwhile | |
421 if getline(s:lnum) =~# '^Index:\s\+\f\+$' | |
422 set ft=diff | |
423 | |
424 " locale input files: Formal Definitions of Cultural Conventions | |
425 " filename must be like en_US, fr_FR@euro or en_US.UTF-8 | |
426 elseif expand("%") =~# '\a\a_\a\a\($\|[.@]\)\|i18n$\|POSIX$\|translit_' | |
427 let s:lnum = 1 | |
428 while s:lnum < 100 && s:lnum < line("$") | |
429 if getline(s:lnum) =~# '^LC_\(IDENTIFICATION\|CTYPE\|COLLATE\|MONETARY\|NUMERIC\|TIME\|MESSAGES\|PAPER\|TELEPHONE\|MEASUREMENT\|NAME\|ADDRESS\)$' | |
430 setf fdcc | |
431 break | |
432 endif | |
433 let s:lnum += 1 | |
434 endwhile | |
435 endif | |
436 unlet s:lnum | |
437 | |
438 endif | |
439 | |
440 unlet s:line2 s:line3 s:line4 s:line5 | |
441 | |
442 endif | |
443 | |
444 " Restore 'cpoptions' | |
445 let &cpo = s:cpo_save | |
446 | |
447 unlet s:cpo_save s:line1 |