forked from github-linguist/linguist
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_blob.rb
360 lines (290 loc) · 13.8 KB
/
test_blob.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
require_relative "./helper"
class TestBlob < Minitest::Test
include Linguist
def setup
silence_warnings do
# git blobs are normally loaded as ASCII-8BIT since they may contain data
# with arbitrary encoding not known ahead of time
@original_external = Encoding.default_external
Encoding.default_external = Encoding.find("ASCII-8BIT")
end
end
def teardown
silence_warnings do
Encoding.default_external = @original_external
end
end
def script_blob(name)
blob = sample_blob_memory(name)
blob.instance_variable_set(:@name, 'script')
blob
end
def test_name
assert_equal "foo.rb", sample_blob_memory("Ruby/foo.rb").name
end
def test_mime_type
assert_equal "application/postscript", fixture_blob_memory("Binary/octocat.ai").mime_type
assert_equal "application/x-ruby", sample_blob_memory("Ruby/grit.rb").mime_type
assert_equal "application/x-sh", sample_blob_memory("Shell/script.sh").mime_type
assert_equal "text/plain", fixture_blob_memory("Data/README").mime_type
end
def test_content_type
assert_equal "application/pdf", fixture_blob_memory("Binary/foo.pdf").content_type
assert_equal "image/png", fixture_blob_memory("Binary/foo.png").content_type
assert_equal "text/plain; charset=iso-8859-2", fixture_blob_memory("Data/README").content_type
end
def test_disposition
assert_equal "attachment; filename=foo+bar.jar", fixture_blob_memory("Binary/foo bar.jar").disposition
assert_equal "attachment; filename=foo.bin", fixture_blob_memory("Binary/foo.bin").disposition
assert_equal "attachment; filename=linguist.gem", fixture_blob_memory("Binary/linguist.gem").disposition
assert_equal "attachment; filename=octocat.ai", fixture_blob_memory("Binary/octocat.ai").disposition
assert_equal "inline", fixture_blob_memory("Data/README").disposition
assert_equal "inline", sample_blob_memory("Text/foo.txt").disposition
assert_equal "inline", sample_blob_memory("Ruby/grit.rb").disposition
assert_equal "inline", fixture_blob_memory("Binary/octocat.png").disposition
end
def test_data
assert_equal "module Foo\nend\n", sample_blob_memory("Ruby/foo.rb").data
end
def test_lines
assert_equal ["module Foo", "end"], sample_blob_memory("Ruby/foo.rb").lines
assert_equal ["line 1", "line 2"], sample_blob_memory("Text/mac.txt").lines
assert_equal 474, sample_blob_memory("Emacs Lisp/ess-julia.el").lines.length
end
def test_lines_maintains_original_encoding
# Even if the file's encoding is detected as something like UTF-16LE,
# earlier versions of the gem made implicit guarantees that the encoding of
# each `line` is in the same encoding as the file was originally read (in
# practice, UTF-8 or ASCII-8BIT)
assert_equal Encoding.default_external, fixture_blob_memory("Data/utf16le").lines.first.encoding
end
def test_size
assert_equal 15, sample_blob_memory("Ruby/foo.rb").size
end
def test_loc
assert_equal 2, sample_blob_memory("Ruby/foo.rb").loc
end
def test_sloc
assert_equal 2, sample_blob_memory("Ruby/foo.rb").sloc
assert_equal 3, fixture_blob_memory("Data/utf16le-windows").sloc
assert_equal 1, fixture_blob_memory("Data/iso8859-8-i").sloc
end
def test_encoding
assert_equal "ISO-8859-2", fixture_blob_memory("Data/README").encoding
assert_equal "ISO-8859-2", fixture_blob_memory("Data/README").ruby_encoding
assert_equal "UTF-8", sample_blob_memory("Text/foo.txt").encoding
assert_equal "UTF-8", sample_blob_memory("Text/foo.txt").ruby_encoding
assert_equal "UTF-16LE", fixture_blob_memory("Data/utf16le").encoding
assert_equal "UTF-16LE", fixture_blob_memory("Data/utf16le").ruby_encoding
assert_equal "UTF-16LE", fixture_blob_memory("Data/utf16le-windows").encoding
assert_equal "UTF-16LE", fixture_blob_memory("Data/utf16le-windows").ruby_encoding
assert_equal "ISO-2022-KR", fixture_blob_memory("Text/ISO-2022-KR.txt").encoding
assert_equal "binary", fixture_blob_memory("Text/ISO-2022-KR.txt").ruby_encoding
assert_nil fixture_blob_memory("Binary/dog.o").encoding
end
def test_binary
assert fixture_blob_memory("Binary/git.deb").binary?
assert fixture_blob_memory("Binary/hello.pbc").binary?
assert fixture_blob_memory("Binary/linguist.gem").binary?
assert fixture_blob_memory("Binary/octocat.ai").binary?
assert fixture_blob_memory("Binary/octocat.png").binary?
assert fixture_blob_memory("Binary/zip").binary?
assert !fixture_blob_memory("Data/README").binary?
assert !sample_blob_memory("Ruby/foo.rb").binary?
assert !sample_blob_memory("Perl/script.pl").binary?
end
def test_all_binary
Samples.each do |sample|
blob = sample_blob_memory(sample[:path])
assert ! (blob.likely_binary? || blob.binary?), "#{sample[:path]} is a binary file"
end
end
def test_text
assert fixture_blob_memory("Data/README").text?
assert fixture_blob_memory("Data/md").text?
assert sample_blob_memory("Shell/script.sh").text?
assert fixture_blob_memory("Data/txt").text?
end
def test_image
assert fixture_blob_memory("Binary/octocat.png").image?
assert !fixture_blob_memory("Binary/octocat.ai").image?
assert !fixture_blob_memory("Binary/octocat.psd").image?
end
def test_solid
assert fixture_blob_memory("Binary/cube.stl").solid?
assert fixture_blob_memory("Data/cube.stl").solid?
end
def test_csv
assert sample_blob_memory("CSV/cars.csv").csv?
end
def test_pdf
assert fixture_blob_memory("Binary/foo.pdf").pdf?
end
def test_viewable
assert fixture_blob_memory("Data/README").viewable?
assert sample_blob_memory("Ruby/foo.rb").viewable?
assert sample_blob_memory("Perl/script.pl").viewable?
assert !fixture_blob_memory("Binary/linguist.gem").viewable?
assert !fixture_blob_memory("Binary/octocat.ai").viewable?
assert !fixture_blob_memory("Binary/octocat.png").viewable?
end
def test_generated
assert !fixture_blob_memory("Data/README").generated?
# Generated .NET Docfiles
assert sample_blob_memory("XML/net_docfile.xml").generated?
# Long line
assert !sample_blob_memory("JavaScript/uglify.js").generated?
# Inlined JS, but mostly code
assert !sample_blob_memory("JavaScript/json2_backbone.js").generated?
# Minified JS
assert !sample_blob_memory("JavaScript/jquery-1.6.1.js").generated?
assert sample_blob_memory("JavaScript/jquery-1.6.1.min.js").generated?
assert sample_blob_memory("JavaScript/jquery-1.4.2.min.js").generated?
# Go lockfiles
assert sample_blob_memory("TOML/filenames/Gopkg.lock").generated?
assert sample_blob_memory("YAML/filenames/glide.lock").generated?
# Cargo generated Cargo.lock file
assert sample_blob_memory("TOML/filenames/Cargo.lock").generated?
# Composer generated composer.lock file
assert sample_blob_memory("JSON/filenames/composer.lock").generated?
# PEG.js-generated parsers
assert sample_blob_memory("JavaScript/parser.js").generated?
# Generated PostScript
assert !sample_blob_memory("PostScript/sierpinski.ps").generated?
# These examples are too basic to tell
assert !sample_blob_memory("JavaScript/hello.js").generated?
assert sample_blob_memory("JavaScript/intro-old.js").generated?
assert sample_blob_memory("JavaScript/classes-old.js").generated?
assert sample_blob_memory("JavaScript/intro.js").generated?
assert sample_blob_memory("JavaScript/classes.js").generated?
assert sample_blob_memory("JavaScript/ccalc-lex.js").generated?
assert sample_blob_memory("JavaScript/ccalc-parse.js").generated?
# Protocol Buffer generated code
assert sample_blob_memory("C++/protocol-buffer.pb.h").generated?
assert sample_blob_memory("C++/protocol-buffer.pb.cc").generated?
assert sample_blob_memory("Java/ProtocolBuffer.java").generated?
assert sample_blob_memory("Python/protocol_buffer_pb2.py").generated?
assert sample_blob_memory("Go/api.pb.go").generated?
assert sample_blob_memory("Go/embedded.go").generated?
assert sample_blob_memory("JavaScript/proto.js").generated?
# Apache Thrift generated code
assert sample_blob_memory("Python/gen-py-linguist-thrift.py").generated?
assert sample_blob_memory("Go/gen-go-linguist-thrift.go").generated?
assert sample_blob_memory("Java/gen-java-linguist-thrift.java").generated?
assert sample_blob_memory("JavaScript/gen-js-linguist-thrift.js").generated?
assert sample_blob_memory("Ruby/gen-rb-linguist-thrift.rb").generated?
assert sample_blob_memory("Objective-C/gen-cocoa-linguist-thrift.m").generated?
assert sample_blob_memory("PHP/ThriftGenerated.php").generated?
# Generated JNI
assert sample_blob_memory("C/jni_layer.h").generated?
# Minified CSS
assert !sample_blob_memory("CSS/bootstrap.css").generated?
assert sample_blob_memory("CSS/bootstrap.min.css").generated?
# Generated VCR
assert sample_blob_memory("YAML/vcr_cassette.yml").generated?
# Generated by Zephir
assert !sample_blob_memory("Zephir/Router.zep").generated?
# Go vendored dependencies
refute sample_blob("vendor/vendor.json").generated?
assert sample_blob("vendor/github.com/kr/s3/sign.go").generated?
refute fixture_blob("go/food_vendor/candy.go").generated?
# Cython-generated C/C++
assert sample_blob_memory("C/sgd_fast.c").generated?
assert sample_blob_memory("C++/wrapper_inner.cpp").generated?
# Unity3D-generated metadata
assert sample_blob_memory("Unity3D Asset/Tiles.meta").generated?
# Racc-generated Ruby
assert sample_blob_memory("Ruby/racc.rb").generated?
# protobuf/grpc-plugin C++
assert sample_blob_memory("C++/hello.grpc.pb.h").generated?
assert sample_blob_memory("C++/grpc.pb.cc").generated?
# Generated HTML
assert sample_blob_memory("HTML/pkgdown.html").generated?
assert sample_blob_memory("HTML/pages.html").generated?
assert fixture_blob_memory("HTML/mandoc.html").generated?
assert fixture_blob_memory("HTML/node78.html").generated?
end
def test_vendored
assert !fixture_blob_memory("Data/README").vendored?
# Go fixtures
assert sample_blob("Go/testdata/foo.yml").vendored?
end
def test_language
Samples.each do |sample|
blob = sample_blob_memory(sample[:path])
assert blob.language, "No language for #{sample[:path]}"
fs_name = blob.language.fs_name ? blob.language.fs_name : blob.language.name
assert_equal sample[:language], fs_name, blob.name
end
# Test language detection for files which shouldn't be used as samples
root = File.expand_path('../fixtures', __FILE__)
Dir.entries(root).each do |language|
next if language == '.' || language == '..' || language == 'Binary' ||
File.basename(language) == 'ace_modes.json'
# Each directory contains test files of a language
dirname = File.join(root, language)
Dir.entries(dirname).each do |filename|
# By default blob search the file in the samples;
# thus, we need to give it the absolute path
filepath = File.join(dirname, filename)
next unless File.file?(filepath)
blob = fixture_blob_memory(filepath)
if language == 'Data'
assert blob.language.nil?, "A language was found for #{filepath}"
elsif language == 'Generated'
assert blob.generated?, "#{filepath} is not a generated file"
else
assert blob.language, "No language for #{filepath}"
fs_name = blob.language.fs_name ? blob.language.fs_name : blob.language.name
assert_equal language, fs_name, blob.name
end
end
end
end
def test_minified_files_not_safe_to_highlight
assert !sample_blob_memory("JavaScript/jquery-1.6.1.min.js").safe_to_colorize?
end
def test_empty
blob = Struct.new(:data) { include Linguist::BlobHelper }
assert blob.new("").empty?
assert blob.new(nil).empty?
refute blob.new(" ").empty?
refute blob.new("nope").empty?
end
def test_include_in_language_stats
generated = sample_blob_memory("CSS/bootstrap.min.css")
assert_predicate generated, :generated?
refute_predicate generated, :include_in_language_stats?
data = sample_blob_memory("Ant Build System/filenames/ant.xml")
assert_equal :data, data.language.type
refute_predicate data, :include_in_language_stats?
prose = sample_blob_memory("Markdown/tender.md")
assert_equal :prose, prose.language.type
refute_predicate prose, :include_in_language_stats?
included = sample_blob_memory("HTML/pages.html")
refute_predicate included, :include_in_language_stats?
# Test detectable override (i.e by .gitattributes)
def prose.detectable?; true end
assert_predicate prose, :include_in_language_stats?
included_not_detectable = included.clone()
def included_not_detectable.detectable?; false end
refute_predicate included_not_detectable, :include_in_language_stats?
# Test not included if vendored, documentation or generated overridden
# even if detectable
included_vendored = included.clone()
def included_vendored.vendored?; true end
refute_predicate included_vendored, :include_in_language_stats?
def included_vendored.detectable?; true end
refute_predicate included_vendored, :include_in_language_stats?
included_documentation = included.clone()
def included_documentation.documentation?; true end
refute_predicate included_documentation, :include_in_language_stats?
def included_documentation.detectable?; true end
refute_predicate included_documentation, :include_in_language_stats?
included_generated = included.clone()
def included_generated.generated?; true end
refute_predicate included_generated, :include_in_language_stats?
def included_generated.detectable?; true end
refute_predicate included_generated, :include_in_language_stats?
end
end