comparison runtime/autoload/dist/script.vim @ 27692:7346315e8517 v8.2.4372

patch 8.2.4372: filetype detection from file contents is in legacy script Commit: https://github.com/vim/vim/commit/299d8e5eec8f8ae91177f7feb67ad59402dfa8e2 Author: Bram Moolenaar <Bram@vim.org> Date: Sun Feb 13 20:32:02 2022 +0000 patch 8.2.4372: filetype detection from file contents is in legacy script Problem: Filetype detection from file contents is in legacy script. Solution: Use a compiled function for filetype detection.
author Bram Moolenaar <Bram@vim.org>
date Sun, 13 Feb 2022 21:45:02 +0100
parents
children 71759abd2145
comparison
equal deleted inserted replaced
27691:8ecc769fa19b 27692:7346315e8517
1 vim9script
2
3 # Vim function for detecting a filetype from the file contents.
4 # Invoked from "scripts.vim" in 'runtimepath'
5 #
6 # Maintainer: Bram Moolenaar <Bram@vim.org>
7 # Last Change: 2022 Feb 13
8
9 export def DetectFiletype()
10 var line1 = getline(1)
11 if line1[0] == '#' && line1[1] == '!'
12 # File that starts with "#!".
13 DetectFromHashBang(line1)
14 else
15 # File does not start with "#!".
16 DetectFromText(line1)
17 endif
18 enddef
19
20 # Called for a script that has "#!" in the first line.
21 def DetectFromHashBang(firstline: string)
22 var line1 = firstline
23
24 # Check for a line like "#!/usr/bin/env {options} bash". Turn it into
25 # "#!/usr/bin/bash" to make matching easier.
26 # Recognize only a few {options} that are commonly used.
27 if line1 =~ '^#!\s*\S*\<env\s'
28 line1 = substitute(line1, '\S\+=\S\+', '', 'g')
29 line1 = substitute(line1, '\(-[iS]\|--ignore-environment\|--split-string\)', '', '')
30 line1 = substitute(line1, '\<env\s\+', '', '')
31 endif
32
33 # Get the program name.
34 # Only accept spaces in PC style paths: "#!c:/program files/perl [args]".
35 # If the word env is used, use the first word after the space:
36 # "#!/usr/bin/env perl [path/args]"
37 # If there is no path use the first word: "#!perl [path/args]".
38 # Otherwise get the last word after a slash: "#!/usr/bin/perl [path/args]".
39 var name: string
40 if line1 =~ '^#!\s*\a:[/\\]'
41 name = substitute(line1, '^#!.*[/\\]\(\i\+\).*', '\1', '')
42 elseif line1 =~ '^#!.*\<env\>'
43 name = substitute(line1, '^#!.*\<env\>\s\+\(\i\+\).*', '\1', '')
44 elseif line1 =~ '^#!\s*[^/\\ ]*\>\([^/\\]\|$\)'
45 name = substitute(line1, '^#!\s*\([^/\\ ]*\>\).*', '\1', '')
46 else
47 name = substitute(line1, '^#!\s*\S*[/\\]\(\i\+\).*', '\1', '')
48 endif
49
50 # tcl scripts may have #!/bin/sh in the first line and "exec wish" in the
51 # third line. Suggested by Steven Atkinson.
52 if getline(3) =~ '^exec wish'
53 name = 'wish'
54 endif
55
56 # Bourne-like shell scripts: bash bash2 ksh ksh93 sh
57 if name =~ '^\(bash\d*\|\|ksh\d*\|sh\)\>'
58 call dist#ft#SetFileTypeSH(line1)
59
60 # csh scripts
61 elseif name =~ '^csh\>'
62 if exists("g:filetype_csh")
63 call dist#ft#SetFileTypeShell(g:filetype_csh)
64 else
65 call dist#ft#SetFileTypeShell("csh")
66 endif
67
68 # tcsh scripts
69 elseif name =~ '^tcsh\>'
70 call dist#ft#SetFileTypeShell("tcsh")
71
72 # Z shell scripts
73 elseif name =~ '^zsh\>'
74 set ft=zsh
75
76 # TCL scripts
77 elseif name =~ '^\(tclsh\|wish\|expectk\|itclsh\|itkwish\)\>'
78 set ft=tcl
79
80 # Expect scripts
81 elseif name =~ '^expect\>'
82 set ft=expect
83
84 # Gnuplot scripts
85 elseif name =~ '^gnuplot\>'
86 set ft=gnuplot
87
88 # Makefiles
89 elseif name =~ 'make\>'
90 set ft=make
91
92 # Pike
93 elseif name =~ '^pike\%(\>\|[0-9]\)'
94 set ft=pike
95
96 # Lua
97 elseif name =~ 'lua'
98 set ft=lua
99
100 # Perl
101 elseif name =~ 'perl'
102 set ft=perl
103
104 # PHP
105 elseif name =~ 'php'
106 set ft=php
107
108 # Python
109 elseif name =~ 'python'
110 set ft=python
111
112 # Groovy
113 elseif name =~ '^groovy\>'
114 set ft=groovy
115
116 # Raku
117 elseif name =~ 'raku'
118 set ft=raku
119
120 # Ruby
121 elseif name =~ 'ruby'
122 set ft=ruby
123
124 # JavaScript
125 elseif name =~ 'node\(js\)\=\>\|js\>' || name =~ 'rhino\>'
126 set ft=javascript
127
128 # BC calculator
129 elseif name =~ '^bc\>'
130 set ft=bc
131
132 # sed
133 elseif name =~ 'sed\>'
134 set ft=sed
135
136 # OCaml-scripts
137 elseif name =~ 'ocaml'
138 set ft=ocaml
139
140 # Awk scripts; also finds "gawk"
141 elseif name =~ 'awk\>'
142 set ft=awk
143
144 # Website MetaLanguage
145 elseif name =~ 'wml'
146 set ft=wml
147
148 # Scheme scripts
149 elseif name =~ 'scheme'
150 set ft=scheme
151
152 # CFEngine scripts
153 elseif name =~ 'cfengine'
154 set ft=cfengine
155
156 # Erlang scripts
157 elseif name =~ 'escript'
158 set ft=erlang
159
160 # Haskell
161 elseif name =~ 'haskell'
162 set ft=haskell
163
164 # Scala
165 elseif name =~ 'scala\>'
166 set ft=scala
167
168 # Clojure
169 elseif name =~ 'clojure'
170 set ft=clojure
171
172 # Free Pascal
173 elseif name =~ 'instantfpc\>'
174 set ft=pascal
175
176 # Fennel
177 elseif name =~ 'fennel\>'
178 set ft=fennel
179
180 # MikroTik RouterOS script
181 elseif name =~ 'rsc\>'
182 set ft=routeros
183
184 # Fish shell
185 elseif name =~ 'fish\>'
186 set ft=fish
187
188 # Gforth
189 elseif name =~ 'gforth\>'
190 set ft=forth
191
192 endif
193 enddef
194
195
196 # Called for a script that does not have "#!" in the first line.
197 def DetectFromText(line1: string)
198 var line2 = getline(2)
199 var line3 = getline(3)
200 var line4 = getline(4)
201 var line5 = getline(5)
202
203 # Bourne-like shell scripts: sh ksh bash bash2
204 if line1 =~ '^:$'
205 call dist#ft#SetFileTypeSH(line1)
206
207 # Z shell scripts
208 elseif line1 =~ '^#compdef\>'
209 || line1 =~ '^#autoload\>'
210 || "\n" .. line1 .. "\n" .. line2 .. "\n" .. line3 ..
211 "\n" .. line4 .. "\n" .. line5
212 =~ '\n\s*emulate\s\+\%(-[LR]\s\+\)\=[ckz]\=sh\>'
213 set ft=zsh
214
215 # ELM Mail files
216 elseif line1 =~ '^From \([a-zA-Z][a-zA-Z_0-9\.=-]*\(@[^ ]*\)\=\|-\) .* \(19\|20\)\d\d$'
217 set ft=mail
218
219 # Mason
220 elseif line1 =~ '^<[%&].*>'
221 set ft=mason
222
223 # Vim scripts (must have '" vim' as the first line to trigger this)
224 elseif line1 =~ '^" *[vV]im$'
225 set ft=vim
226
227 # libcxx and libstdc++ standard library headers like "iostream" do not have
228 # an extension, recognize the Emacs file mode.
229 elseif line1 =~? '-\*-.*C++.*-\*-'
230 set ft=cpp
231
232 # MOO
233 elseif line1 =~ '^\*\* LambdaMOO Database, Format Version \%([1-3]\>\)\@!\d\+ \*\*$'
234 set ft=moo
235
236 # Diff file:
237 # - "diff" in first line (context diff)
238 # - "Only in " in first line
239 # - "--- " in first line and "+++ " in second line (unified diff).
240 # - "*** " in first line and "--- " in second line (context diff).
241 # - "# It was generated by makepatch " in the second line (makepatch diff).
242 # - "Index: <filename>" in the first line (CVS file)
243 # - "=== ", line of "=", "---", "+++ " (SVK diff)
244 # - "=== ", "--- ", "+++ " (bzr diff, common case)
245 # - "=== (removed|added|renamed|modified)" (bzr diff, alternative)
246 # - "# HG changeset patch" in first line (Mercurial export format)
247 elseif line1 =~ '^\(diff\>\|Only in \|\d\+\(,\d\+\)\=[cda]\d\+\>\|# It was generated by makepatch \|Index:\s\+\f\+\r\=$\|===== \f\+ \d\+\.\d\+ vs edited\|==== //\f\+#\d\+\|# HG changeset patch\)'
248 || (line1 =~ '^--- ' && line2 =~ '^+++ ')
249 || (line1 =~ '^\* looking for ' && line2 =~ '^\* comparing to ')
250 || (line1 =~ '^\*\*\* ' && line2 =~ '^--- ')
251 || (line1 =~ '^=== ' && ((line2 =~ '^=\{66\}' && line3 =~ '^--- ' && line4 =~ '^+++') || (line2 =~ '^--- ' && line3 =~ '^+++ ')))
252 || (line1 =~ '^=== \(removed\|added\|renamed\|modified\)')
253 set ft=diff
254
255 # PostScript Files (must have %!PS as the first line, like a2ps output)
256 elseif line1 =~ '^%![ \t]*PS'
257 set ft=postscr
258
259 # M4 scripts: Guess there is a line that starts with "dnl".
260 elseif line1 =~ '^\s*dnl\>'
261 || line2 =~ '^\s*dnl\>'
262 || line3 =~ '^\s*dnl\>'
263 || line4 =~ '^\s*dnl\>'
264 || line5 =~ '^\s*dnl\>'
265 set ft=m4
266
267 # AmigaDos scripts
268 elseif $TERM == "amiga" && (line1 =~ "^;" || line1 =~? '^\.bra')
269 set ft=amiga
270
271 # SiCAD scripts (must have procn or procd as the first line to trigger this)
272 elseif line1 =~? '^ *proc[nd] *$'
273 set ft=sicad
274
275 # Purify log files start with "**** Purify"
276 elseif line1 =~ '^\*\*\*\* Purify'
277 set ft=purifylog
278
279 # XML
280 elseif line1 =~ '<?\s*xml.*?>'
281 set ft=xml
282
283 # XHTML (e.g.: PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN")
284 elseif line1 =~ '\<DTD\s\+XHTML\s'
285 set ft=xhtml
286
287 # HTML (e.g.: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN")
288 # Avoid "doctype html", used by slim.
289 elseif line1 =~? '<!DOCTYPE\s\+html\>'
290 set ft=html
291
292 # PDF
293 elseif line1 =~ '^%PDF-'
294 set ft=pdf
295
296 # XXD output
297 elseif line1 =~ '^\x\{7}: \x\{2} \=\x\{2} \=\x\{2} \=\x\{2} '
298 set ft=xxd
299
300 # RCS/CVS log output
301 elseif line1 =~ '^RCS file:' || line2 =~ '^RCS file:'
302 set ft=rcslog
303
304 # CVS commit
305 elseif line2 =~ '^CVS:' || getline("$") =~ '^CVS: '
306 set ft=cvs
307
308 # Prescribe
309 elseif line1 =~ '^!R!'
310 set ft=prescribe
311
312 # Send-pr
313 elseif line1 =~ '^SEND-PR:'
314 set ft=sendpr
315
316 # SNNS files
317 elseif line1 =~ '^SNNS network definition file'
318 set ft=snnsnet
319 elseif line1 =~ '^SNNS pattern definition file'
320 set ft=snnspat
321 elseif line1 =~ '^SNNS result file'
322 set ft=snnsres
323
324 # Virata
325 elseif line1 =~ '^%.\{-}[Vv]irata'
326 || line2 =~ '^%.\{-}[Vv]irata'
327 || line3 =~ '^%.\{-}[Vv]irata'
328 || line4 =~ '^%.\{-}[Vv]irata'
329 || line5 =~ '^%.\{-}[Vv]irata'
330 set ft=virata
331
332 # Strace
333 elseif line1 =~ '[0-9:.]* *execve(' || line1 =~ '^__libc_start_main'
334 set ft=strace
335
336 # VSE JCL
337 elseif line1 =~ '^\* $$ JOB\>' || line1 =~ '^// *JOB\>'
338 set ft=vsejcl
339
340 # TAK and SINDA
341 elseif line4 =~ 'K & K Associates' || line2 =~ 'TAK 2000'
342 set ft=takout
343 elseif line3 =~ 'S Y S T E M S I M P R O V E D '
344 set ft=sindaout
345 elseif getline(6) =~ 'Run Date: '
346 set ft=takcmp
347 elseif getline(9) =~ 'Node File 1'
348 set ft=sindacmp
349
350 # DNS zone files
351 elseif line1 .. line2 .. line3 .. line4 =~ '^; <<>> DiG [0-9.]\+.* <<>>\|$ORIGIN\|$TTL\|IN\s\+SOA'
352 set ft=bindzone
353
354 # BAAN
355 elseif line1 =~ '|\*\{1,80}' && line2 =~ 'VRC '
356 || line2 =~ '|\*\{1,80}' && line3 =~ 'VRC '
357 set ft=baan
358
359 # Valgrind
360 elseif line1 =~ '^==\d\+== valgrind' || line3 =~ '^==\d\+== Using valgrind'
361 set ft=valgrind
362
363 # Go docs
364 elseif line1 =~ '^PACKAGE DOCUMENTATION$'
365 set ft=godoc
366
367 # Renderman Interface Bytestream
368 elseif line1 =~ '^##RenderMan'
369 set ft=rib
370
371 # Scheme scripts
372 elseif line1 =~ 'exec\s\+\S*scheme' || line2 =~ 'exec\s\+\S*scheme'
373 set ft=scheme
374
375 # Git output
376 elseif line1 =~ '^\(commit\|tree\|object\) \x\{40,\}\>\|^tag \S\+$'
377 set ft=git
378
379 # Gprof (gnu profiler)
380 elseif line1 == 'Flat profile:'
381 && line2 == ''
382 && line3 =~ '^Each sample counts as .* seconds.$'
383 set ft=gprof
384
385 # Erlang terms
386 # (See also: http://www.gnu.org/software/emacs/manual/html_node/emacs/Choosing-Modes.html#Choosing-Modes)
387 elseif line1 =~? '-\*-.*erlang.*-\*-'
388 set ft=erlang
389
390 # YAML
391 elseif line1 =~ '^%YAML'
392 set ft=yaml
393
394 # MikroTik RouterOS script
395 elseif line1 =~ '^#.*by RouterOS.*$'
396 set ft=routeros
397
398 # Sed scripts
399 # #ncomment is allowed but most likely a false positive so require a space
400 # before any trailing comment text
401 elseif line1 =~ '^#n\%($\|\s\)'
402 set ft=sed
403
404 else
405 var lnum = 1
406 while getline(lnum) =~ "^? " && lnum < line("$")
407 lnum += 1
408 endwhile
409 if getline(lnum) =~ '^Index:\s\+\f\+$'
410 # CVS diff
411 set ft=diff
412
413 # locale input files: Formal Definitions of Cultural Conventions
414 # filename must be like en_US, fr_FR@euro or en_US.UTF-8
415 elseif expand("%") =~ '\a\a_\a\a\($\|[.@]\)\|i18n$\|POSIX$\|translit_'
416 lnum = 1
417 while lnum < 100 && lnum < line("$")
418 if getline(lnum) =~ '^LC_\(IDENTIFICATION\|CTYPE\|COLLATE\|MONETARY\|NUMERIC\|TIME\|MESSAGES\|PAPER\|TELEPHONE\|MEASUREMENT\|NAME\|ADDRESS\)$'
419 setf fdcc
420 break
421 endif
422 lnum += 1
423 endwhile
424 endif
425 endif
426 enddef