Mercurial > vim
comparison runtime/autoload/dist/script.vim @ 27692:7346315e8517 v8.2.4372
patch 8.2.4372: filetype detection from file contents is in legacy script
Commit: https://github.com/vim/vim/commit/299d8e5eec8f8ae91177f7feb67ad59402dfa8e2
Author: Bram Moolenaar <Bram@vim.org>
Date: Sun Feb 13 20:32:02 2022 +0000
patch 8.2.4372: filetype detection from file contents is in legacy script
Problem: Filetype detection from file contents is in legacy script.
Solution: Use a compiled function for filetype detection.
author | Bram Moolenaar <Bram@vim.org> |
---|---|
date | Sun, 13 Feb 2022 21:45:02 +0100 |
parents | |
children | 71759abd2145 |
comparison
equal
deleted
inserted
replaced
27691:8ecc769fa19b | 27692:7346315e8517 |
---|---|
1 vim9script | |
2 | |
3 # Vim function for detecting a filetype from the file contents. | |
4 # Invoked from "scripts.vim" in 'runtimepath' | |
5 # | |
6 # Maintainer: Bram Moolenaar <Bram@vim.org> | |
7 # Last Change: 2022 Feb 13 | |
8 | |
9 export def DetectFiletype() | |
10 var line1 = getline(1) | |
11 if line1[0] == '#' && line1[1] == '!' | |
12 # File that starts with "#!". | |
13 DetectFromHashBang(line1) | |
14 else | |
15 # File does not start with "#!". | |
16 DetectFromText(line1) | |
17 endif | |
18 enddef | |
19 | |
20 # Called for a script that has "#!" in the first line. | |
21 def DetectFromHashBang(firstline: string) | |
22 var line1 = firstline | |
23 | |
24 # Check for a line like "#!/usr/bin/env {options} bash". Turn it into | |
25 # "#!/usr/bin/bash" to make matching easier. | |
26 # Recognize only a few {options} that are commonly used. | |
27 if line1 =~ '^#!\s*\S*\<env\s' | |
28 line1 = substitute(line1, '\S\+=\S\+', '', 'g') | |
29 line1 = substitute(line1, '\(-[iS]\|--ignore-environment\|--split-string\)', '', '') | |
30 line1 = substitute(line1, '\<env\s\+', '', '') | |
31 endif | |
32 | |
33 # Get the program name. | |
34 # Only accept spaces in PC style paths: "#!c:/program files/perl [args]". | |
35 # If the word env is used, use the first word after the space: | |
36 # "#!/usr/bin/env perl [path/args]" | |
37 # If there is no path use the first word: "#!perl [path/args]". | |
38 # Otherwise get the last word after a slash: "#!/usr/bin/perl [path/args]". | |
39 var name: string | |
40 if line1 =~ '^#!\s*\a:[/\\]' | |
41 name = substitute(line1, '^#!.*[/\\]\(\i\+\).*', '\1', '') | |
42 elseif line1 =~ '^#!.*\<env\>' | |
43 name = substitute(line1, '^#!.*\<env\>\s\+\(\i\+\).*', '\1', '') | |
44 elseif line1 =~ '^#!\s*[^/\\ ]*\>\([^/\\]\|$\)' | |
45 name = substitute(line1, '^#!\s*\([^/\\ ]*\>\).*', '\1', '') | |
46 else | |
47 name = substitute(line1, '^#!\s*\S*[/\\]\(\i\+\).*', '\1', '') | |
48 endif | |
49 | |
50 # tcl scripts may have #!/bin/sh in the first line and "exec wish" in the | |
51 # third line. Suggested by Steven Atkinson. | |
52 if getline(3) =~ '^exec wish' | |
53 name = 'wish' | |
54 endif | |
55 | |
56 # Bourne-like shell scripts: bash bash2 ksh ksh93 sh | |
57 if name =~ '^\(bash\d*\|\|ksh\d*\|sh\)\>' | |
58 call dist#ft#SetFileTypeSH(line1) | |
59 | |
60 # csh scripts | |
61 elseif name =~ '^csh\>' | |
62 if exists("g:filetype_csh") | |
63 call dist#ft#SetFileTypeShell(g:filetype_csh) | |
64 else | |
65 call dist#ft#SetFileTypeShell("csh") | |
66 endif | |
67 | |
68 # tcsh scripts | |
69 elseif name =~ '^tcsh\>' | |
70 call dist#ft#SetFileTypeShell("tcsh") | |
71 | |
72 # Z shell scripts | |
73 elseif name =~ '^zsh\>' | |
74 set ft=zsh | |
75 | |
76 # TCL scripts | |
77 elseif name =~ '^\(tclsh\|wish\|expectk\|itclsh\|itkwish\)\>' | |
78 set ft=tcl | |
79 | |
80 # Expect scripts | |
81 elseif name =~ '^expect\>' | |
82 set ft=expect | |
83 | |
84 # Gnuplot scripts | |
85 elseif name =~ '^gnuplot\>' | |
86 set ft=gnuplot | |
87 | |
88 # Makefiles | |
89 elseif name =~ 'make\>' | |
90 set ft=make | |
91 | |
92 # Pike | |
93 elseif name =~ '^pike\%(\>\|[0-9]\)' | |
94 set ft=pike | |
95 | |
96 # Lua | |
97 elseif name =~ 'lua' | |
98 set ft=lua | |
99 | |
100 # Perl | |
101 elseif name =~ 'perl' | |
102 set ft=perl | |
103 | |
104 # PHP | |
105 elseif name =~ 'php' | |
106 set ft=php | |
107 | |
108 # Python | |
109 elseif name =~ 'python' | |
110 set ft=python | |
111 | |
112 # Groovy | |
113 elseif name =~ '^groovy\>' | |
114 set ft=groovy | |
115 | |
116 # Raku | |
117 elseif name =~ 'raku' | |
118 set ft=raku | |
119 | |
120 # Ruby | |
121 elseif name =~ 'ruby' | |
122 set ft=ruby | |
123 | |
124 # JavaScript | |
125 elseif name =~ 'node\(js\)\=\>\|js\>' || name =~ 'rhino\>' | |
126 set ft=javascript | |
127 | |
128 # BC calculator | |
129 elseif name =~ '^bc\>' | |
130 set ft=bc | |
131 | |
132 # sed | |
133 elseif name =~ 'sed\>' | |
134 set ft=sed | |
135 | |
136 # OCaml-scripts | |
137 elseif name =~ 'ocaml' | |
138 set ft=ocaml | |
139 | |
140 # Awk scripts; also finds "gawk" | |
141 elseif name =~ 'awk\>' | |
142 set ft=awk | |
143 | |
144 # Website MetaLanguage | |
145 elseif name =~ 'wml' | |
146 set ft=wml | |
147 | |
148 # Scheme scripts | |
149 elseif name =~ 'scheme' | |
150 set ft=scheme | |
151 | |
152 # CFEngine scripts | |
153 elseif name =~ 'cfengine' | |
154 set ft=cfengine | |
155 | |
156 # Erlang scripts | |
157 elseif name =~ 'escript' | |
158 set ft=erlang | |
159 | |
160 # Haskell | |
161 elseif name =~ 'haskell' | |
162 set ft=haskell | |
163 | |
164 # Scala | |
165 elseif name =~ 'scala\>' | |
166 set ft=scala | |
167 | |
168 # Clojure | |
169 elseif name =~ 'clojure' | |
170 set ft=clojure | |
171 | |
172 # Free Pascal | |
173 elseif name =~ 'instantfpc\>' | |
174 set ft=pascal | |
175 | |
176 # Fennel | |
177 elseif name =~ 'fennel\>' | |
178 set ft=fennel | |
179 | |
180 # MikroTik RouterOS script | |
181 elseif name =~ 'rsc\>' | |
182 set ft=routeros | |
183 | |
184 # Fish shell | |
185 elseif name =~ 'fish\>' | |
186 set ft=fish | |
187 | |
188 # Gforth | |
189 elseif name =~ 'gforth\>' | |
190 set ft=forth | |
191 | |
192 endif | |
193 enddef | |
194 | |
195 | |
196 # Called for a script that does not have "#!" in the first line. | |
197 def DetectFromText(line1: string) | |
198 var line2 = getline(2) | |
199 var line3 = getline(3) | |
200 var line4 = getline(4) | |
201 var line5 = getline(5) | |
202 | |
203 # Bourne-like shell scripts: sh ksh bash bash2 | |
204 if line1 =~ '^:$' | |
205 call dist#ft#SetFileTypeSH(line1) | |
206 | |
207 # Z shell scripts | |
208 elseif line1 =~ '^#compdef\>' | |
209 || line1 =~ '^#autoload\>' | |
210 || "\n" .. line1 .. "\n" .. line2 .. "\n" .. line3 .. | |
211 "\n" .. line4 .. "\n" .. line5 | |
212 =~ '\n\s*emulate\s\+\%(-[LR]\s\+\)\=[ckz]\=sh\>' | |
213 set ft=zsh | |
214 | |
215 # ELM Mail files | |
216 elseif line1 =~ '^From \([a-zA-Z][a-zA-Z_0-9\.=-]*\(@[^ ]*\)\=\|-\) .* \(19\|20\)\d\d$' | |
217 set ft=mail | |
218 | |
219 # Mason | |
220 elseif line1 =~ '^<[%&].*>' | |
221 set ft=mason | |
222 | |
223 # Vim scripts (must have '" vim' as the first line to trigger this) | |
224 elseif line1 =~ '^" *[vV]im$' | |
225 set ft=vim | |
226 | |
227 # libcxx and libstdc++ standard library headers like "iostream" do not have | |
228 # an extension, recognize the Emacs file mode. | |
229 elseif line1 =~? '-\*-.*C++.*-\*-' | |
230 set ft=cpp | |
231 | |
232 # MOO | |
233 elseif line1 =~ '^\*\* LambdaMOO Database, Format Version \%([1-3]\>\)\@!\d\+ \*\*$' | |
234 set ft=moo | |
235 | |
236 # Diff file: | |
237 # - "diff" in first line (context diff) | |
238 # - "Only in " in first line | |
239 # - "--- " in first line and "+++ " in second line (unified diff). | |
240 # - "*** " in first line and "--- " in second line (context diff). | |
241 # - "# It was generated by makepatch " in the second line (makepatch diff). | |
242 # - "Index: <filename>" in the first line (CVS file) | |
243 # - "=== ", line of "=", "---", "+++ " (SVK diff) | |
244 # - "=== ", "--- ", "+++ " (bzr diff, common case) | |
245 # - "=== (removed|added|renamed|modified)" (bzr diff, alternative) | |
246 # - "# HG changeset patch" in first line (Mercurial export format) | |
247 elseif line1 =~ '^\(diff\>\|Only in \|\d\+\(,\d\+\)\=[cda]\d\+\>\|# It was generated by makepatch \|Index:\s\+\f\+\r\=$\|===== \f\+ \d\+\.\d\+ vs edited\|==== //\f\+#\d\+\|# HG changeset patch\)' | |
248 || (line1 =~ '^--- ' && line2 =~ '^+++ ') | |
249 || (line1 =~ '^\* looking for ' && line2 =~ '^\* comparing to ') | |
250 || (line1 =~ '^\*\*\* ' && line2 =~ '^--- ') | |
251 || (line1 =~ '^=== ' && ((line2 =~ '^=\{66\}' && line3 =~ '^--- ' && line4 =~ '^+++') || (line2 =~ '^--- ' && line3 =~ '^+++ '))) | |
252 || (line1 =~ '^=== \(removed\|added\|renamed\|modified\)') | |
253 set ft=diff | |
254 | |
255 # PostScript Files (must have %!PS as the first line, like a2ps output) | |
256 elseif line1 =~ '^%![ \t]*PS' | |
257 set ft=postscr | |
258 | |
259 # M4 scripts: Guess there is a line that starts with "dnl". | |
260 elseif line1 =~ '^\s*dnl\>' | |
261 || line2 =~ '^\s*dnl\>' | |
262 || line3 =~ '^\s*dnl\>' | |
263 || line4 =~ '^\s*dnl\>' | |
264 || line5 =~ '^\s*dnl\>' | |
265 set ft=m4 | |
266 | |
267 # AmigaDos scripts | |
268 elseif $TERM == "amiga" && (line1 =~ "^;" || line1 =~? '^\.bra') | |
269 set ft=amiga | |
270 | |
271 # SiCAD scripts (must have procn or procd as the first line to trigger this) | |
272 elseif line1 =~? '^ *proc[nd] *$' | |
273 set ft=sicad | |
274 | |
275 # Purify log files start with "**** Purify" | |
276 elseif line1 =~ '^\*\*\*\* Purify' | |
277 set ft=purifylog | |
278 | |
279 # XML | |
280 elseif line1 =~ '<?\s*xml.*?>' | |
281 set ft=xml | |
282 | |
283 # XHTML (e.g.: PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN") | |
284 elseif line1 =~ '\<DTD\s\+XHTML\s' | |
285 set ft=xhtml | |
286 | |
287 # HTML (e.g.: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN") | |
288 # Avoid "doctype html", used by slim. | |
289 elseif line1 =~? '<!DOCTYPE\s\+html\>' | |
290 set ft=html | |
291 | |
292 # PDF | |
293 elseif line1 =~ '^%PDF-' | |
294 set ft=pdf | |
295 | |
296 # XXD output | |
297 elseif line1 =~ '^\x\{7}: \x\{2} \=\x\{2} \=\x\{2} \=\x\{2} ' | |
298 set ft=xxd | |
299 | |
300 # RCS/CVS log output | |
301 elseif line1 =~ '^RCS file:' || line2 =~ '^RCS file:' | |
302 set ft=rcslog | |
303 | |
304 # CVS commit | |
305 elseif line2 =~ '^CVS:' || getline("$") =~ '^CVS: ' | |
306 set ft=cvs | |
307 | |
308 # Prescribe | |
309 elseif line1 =~ '^!R!' | |
310 set ft=prescribe | |
311 | |
312 # Send-pr | |
313 elseif line1 =~ '^SEND-PR:' | |
314 set ft=sendpr | |
315 | |
316 # SNNS files | |
317 elseif line1 =~ '^SNNS network definition file' | |
318 set ft=snnsnet | |
319 elseif line1 =~ '^SNNS pattern definition file' | |
320 set ft=snnspat | |
321 elseif line1 =~ '^SNNS result file' | |
322 set ft=snnsres | |
323 | |
324 # Virata | |
325 elseif line1 =~ '^%.\{-}[Vv]irata' | |
326 || line2 =~ '^%.\{-}[Vv]irata' | |
327 || line3 =~ '^%.\{-}[Vv]irata' | |
328 || line4 =~ '^%.\{-}[Vv]irata' | |
329 || line5 =~ '^%.\{-}[Vv]irata' | |
330 set ft=virata | |
331 | |
332 # Strace | |
333 elseif line1 =~ '[0-9:.]* *execve(' || line1 =~ '^__libc_start_main' | |
334 set ft=strace | |
335 | |
336 # VSE JCL | |
337 elseif line1 =~ '^\* $$ JOB\>' || line1 =~ '^// *JOB\>' | |
338 set ft=vsejcl | |
339 | |
340 # TAK and SINDA | |
341 elseif line4 =~ 'K & K Associates' || line2 =~ 'TAK 2000' | |
342 set ft=takout | |
343 elseif line3 =~ 'S Y S T E M S I M P R O V E D ' | |
344 set ft=sindaout | |
345 elseif getline(6) =~ 'Run Date: ' | |
346 set ft=takcmp | |
347 elseif getline(9) =~ 'Node File 1' | |
348 set ft=sindacmp | |
349 | |
350 # DNS zone files | |
351 elseif line1 .. line2 .. line3 .. line4 =~ '^; <<>> DiG [0-9.]\+.* <<>>\|$ORIGIN\|$TTL\|IN\s\+SOA' | |
352 set ft=bindzone | |
353 | |
354 # BAAN | |
355 elseif line1 =~ '|\*\{1,80}' && line2 =~ 'VRC ' | |
356 || line2 =~ '|\*\{1,80}' && line3 =~ 'VRC ' | |
357 set ft=baan | |
358 | |
359 # Valgrind | |
360 elseif line1 =~ '^==\d\+== valgrind' || line3 =~ '^==\d\+== Using valgrind' | |
361 set ft=valgrind | |
362 | |
363 # Go docs | |
364 elseif line1 =~ '^PACKAGE DOCUMENTATION$' | |
365 set ft=godoc | |
366 | |
367 # Renderman Interface Bytestream | |
368 elseif line1 =~ '^##RenderMan' | |
369 set ft=rib | |
370 | |
371 # Scheme scripts | |
372 elseif line1 =~ 'exec\s\+\S*scheme' || line2 =~ 'exec\s\+\S*scheme' | |
373 set ft=scheme | |
374 | |
375 # Git output | |
376 elseif line1 =~ '^\(commit\|tree\|object\) \x\{40,\}\>\|^tag \S\+$' | |
377 set ft=git | |
378 | |
379 # Gprof (gnu profiler) | |
380 elseif line1 == 'Flat profile:' | |
381 && line2 == '' | |
382 && line3 =~ '^Each sample counts as .* seconds.$' | |
383 set ft=gprof | |
384 | |
385 # Erlang terms | |
386 # (See also: http://www.gnu.org/software/emacs/manual/html_node/emacs/Choosing-Modes.html#Choosing-Modes) | |
387 elseif line1 =~? '-\*-.*erlang.*-\*-' | |
388 set ft=erlang | |
389 | |
390 # YAML | |
391 elseif line1 =~ '^%YAML' | |
392 set ft=yaml | |
393 | |
394 # MikroTik RouterOS script | |
395 elseif line1 =~ '^#.*by RouterOS.*$' | |
396 set ft=routeros | |
397 | |
398 # Sed scripts | |
399 # #ncomment is allowed but most likely a false positive so require a space | |
400 # before any trailing comment text | |
401 elseif line1 =~ '^#n\%($\|\s\)' | |
402 set ft=sed | |
403 | |
404 else | |
405 var lnum = 1 | |
406 while getline(lnum) =~ "^? " && lnum < line("$") | |
407 lnum += 1 | |
408 endwhile | |
409 if getline(lnum) =~ '^Index:\s\+\f\+$' | |
410 # CVS diff | |
411 set ft=diff | |
412 | |
413 # locale input files: Formal Definitions of Cultural Conventions | |
414 # filename must be like en_US, fr_FR@euro or en_US.UTF-8 | |
415 elseif expand("%") =~ '\a\a_\a\a\($\|[.@]\)\|i18n$\|POSIX$\|translit_' | |
416 lnum = 1 | |
417 while lnum < 100 && lnum < line("$") | |
418 if getline(lnum) =~ '^LC_\(IDENTIFICATION\|CTYPE\|COLLATE\|MONETARY\|NUMERIC\|TIME\|MESSAGES\|PAPER\|TELEPHONE\|MEASUREMENT\|NAME\|ADDRESS\)$' | |
419 setf fdcc | |
420 break | |
421 endif | |
422 lnum += 1 | |
423 endwhile | |
424 endif | |
425 endif | |
426 enddef |