@@ -154,7 +154,9 @@ export async function fileTypeStream(webStream, options) {
154
154
155
155
export class FileTypeParser {
156
156
constructor ( options ) {
157
- this . detectors = [ ...( options ?. customDetectors ?? [ ] ) , this . parse ] ;
157
+ this . detectors = [ ...( options ?. customDetectors ?? [ ] ) ,
158
+ { id : 'core' , detect : this . detectConfident } ,
159
+ { id : 'core.imprecise' , detect : this . detectImprecise } ] ;
158
160
this . tokenizerOptions = {
159
161
abortSignal : options ?. signal ,
160
162
} ;
@@ -165,7 +167,7 @@ export class FileTypeParser {
165
167
166
168
// Iterate through all file-type detectors
167
169
for ( const detector of this . detectors ) {
168
- const fileType = await detector ( tokenizer ) ;
170
+ const fileType = await detector . detect ( tokenizer ) ;
169
171
if ( fileType ) {
170
172
return fileType ;
171
173
}
@@ -256,7 +258,8 @@ export class FileTypeParser {
256
258
return this . check ( stringToBytes ( header ) , options ) ;
257
259
}
258
260
259
- parse = async tokenizer => {
261
+ // Detections with a high degree of certainty in identifying the correct file type
262
+ detectConfident = async tokenizer => {
260
263
this . buffer = new Uint8Array ( reasonableDetectionSizeInBytes ) ;
261
264
262
265
// Keep reading until EOF if the file size is unknown.
@@ -346,7 +349,7 @@ export class FileTypeParser {
346
349
if ( this . check ( [ 0xEF , 0xBB , 0xBF ] ) ) { // UTF-8-BOM
347
350
// Strip off UTF-8-BOM
348
351
this . tokenizer . ignore ( 3 ) ;
349
- return this . parse ( tokenizer ) ;
352
+ return this . detectConfident ( tokenizer ) ;
350
353
}
351
354
352
355
if ( this . check ( [ 0x47 , 0x49 , 0x46 ] ) ) {
@@ -1406,39 +1409,6 @@ export class FileTypeParser {
1406
1409
return undefined ; // Some unknown text based format
1407
1410
}
1408
1411
1409
- // -- Unsafe signatures --
1410
-
1411
- if (
1412
- this . check ( [ 0x0 , 0x0 , 0x1 , 0xBA ] )
1413
- || this . check ( [ 0x0 , 0x0 , 0x1 , 0xB3 ] )
1414
- ) {
1415
- return {
1416
- ext : 'mpg' ,
1417
- mime : 'video/mpeg' ,
1418
- } ;
1419
- }
1420
-
1421
- if ( this . check ( [ 0x00 , 0x01 , 0x00 , 0x00 , 0x00 ] ) ) {
1422
- return {
1423
- ext : 'ttf' ,
1424
- mime : 'font/ttf' ,
1425
- } ;
1426
- }
1427
-
1428
- if ( this . check ( [ 0x00 , 0x00 , 0x01 , 0x00 ] ) ) {
1429
- return {
1430
- ext : 'ico' ,
1431
- mime : 'image/x-icon' ,
1432
- } ;
1433
- }
1434
-
1435
- if ( this . check ( [ 0x00 , 0x00 , 0x02 , 0x00 ] ) ) {
1436
- return {
1437
- ext : 'cur' ,
1438
- mime : 'image/x-icon' ,
1439
- } ;
1440
- }
1441
-
1442
1412
if ( this . check ( [ 0xD0 , 0xCF , 0x11 , 0xE0 , 0xA1 , 0xB1 , 0x1A , 0xE1 ] ) ) {
1443
1413
// Detected Microsoft Compound File Binary File (MS-CFB) Format.
1444
1414
return {
@@ -1644,6 +1614,45 @@ export class FileTypeParser {
1644
1614
mime : 'application/pgp-encrypted' ,
1645
1615
} ;
1646
1616
}
1617
+ } ;
1618
+
1619
+ // Detections with limited supporting data, resulting in a higher likelihood of false positives
1620
+ detectImprecise = async tokenizer => {
1621
+ this . buffer = new Uint8Array ( reasonableDetectionSizeInBytes ) ;
1622
+
1623
+ // Read initial sample size of 8 bytes
1624
+ await tokenizer . peekBuffer ( this . buffer , { length : Math . min ( 8 , tokenizer . fileInfo . size ) , mayBeLess : true } ) ;
1625
+
1626
+ if (
1627
+ this . check ( [ 0x0 , 0x0 , 0x1 , 0xBA ] )
1628
+ || this . check ( [ 0x0 , 0x0 , 0x1 , 0xB3 ] )
1629
+ ) {
1630
+ return {
1631
+ ext : 'mpg' ,
1632
+ mime : 'video/mpeg' ,
1633
+ } ;
1634
+ }
1635
+
1636
+ if ( this . check ( [ 0x00 , 0x01 , 0x00 , 0x00 , 0x00 ] ) ) {
1637
+ return {
1638
+ ext : 'ttf' ,
1639
+ mime : 'font/ttf' ,
1640
+ } ;
1641
+ }
1642
+
1643
+ if ( this . check ( [ 0x00 , 0x00 , 0x01 , 0x00 ] ) ) {
1644
+ return {
1645
+ ext : 'ico' ,
1646
+ mime : 'image/x-icon' ,
1647
+ } ;
1648
+ }
1649
+
1650
+ if ( this . check ( [ 0x00 , 0x00 , 0x02 , 0x00 ] ) ) {
1651
+ return {
1652
+ ext : 'cur' ,
1653
+ mime : 'image/x-icon' ,
1654
+ } ;
1655
+ }
1647
1656
1648
1657
// Check MPEG 1 or 2 Layer 3 header, or 'layer 0' for ADTS (MPEG sync-word 0xFFE)
1649
1658
if ( this . buffer . length >= 2 && this . check ( [ 0xFF , 0xE0 ] , { offset : 0 , mask : [ 0xFF , 0xE0 ] } ) ) {
0 commit comments