Skip to content

Commit 793ef33

Browse files
committed
Separate safe from unsafe detectors, introduce detector identification
Extend the `Detector` type with an `id` property, which allows users to understand which detector is doing what. The core detectors are seperated in: Extend the `Detector` type with an `id` property, which allows users to understand which detector is doing what. The core detectors are seperated in: - `default.safe`: Safe core detectors - `default.unsafe`: Collection of detectors with higher probability of false positive
1 parent 1fe621a commit 793ef33

File tree

4 files changed

+75
-55
lines changed

4 files changed

+75
-55
lines changed

core.d.ts

+4-1
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,10 @@ console.log(fileType); // {ext: 'unicorn', mime: 'application/unicorn'}
162162
@param fileType - The file type detected by standard or previous custom detectors, or `undefined` if no match is found.
163163
@returns The detected file type, or `undefined` if no match is found.
164164
*/
165-
export type Detector = (tokenizer: ITokenizer, fileType?: FileTypeResult) => Promise<FileTypeResult | undefined>;
165+
export type Detector = {
166+
id: string;
167+
detect: (tokenizer: ITokenizer, fileType?: FileTypeResult) => Promise<FileTypeResult | undefined>;
168+
};
166169

167170
export type FileTypeOptions = {
168171
customDetectors?: Iterable<Detector>;

core.js

+44-37
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,9 @@ export async function fileTypeStream(webStream, options) {
129129

130130
export class FileTypeParser {
131131
constructor(options) {
132-
this.detectors = [...(options?.customDetectors ?? []), this.parse];
132+
this.detectors = [...(options?.customDetectors ?? []),
133+
{id: 'core.safe', detect: this.detectCore},
134+
{id: 'core.unsafe', detect: this.detectUnsafe}];
133135
this.tokenizerOptions = {
134136
abortSignal: options?.signal,
135137
};
@@ -140,7 +142,7 @@ export class FileTypeParser {
140142

141143
// Iterate through all file-type detectors
142144
for (const detector of this.detectors) {
143-
const fileType = await detector(tokenizer);
145+
const fileType = await detector.detect(tokenizer);
144146
if (fileType) {
145147
return fileType;
146148
}
@@ -231,7 +233,7 @@ export class FileTypeParser {
231233
return this.check(stringToBytes(header), options);
232234
}
233235

234-
parse = async tokenizer => {
236+
detectCore = async tokenizer => {
235237
this.buffer = new Uint8Array(reasonableDetectionSizeInBytes);
236238

237239
// Keep reading until EOF if the file size is unknown.
@@ -321,7 +323,7 @@ export class FileTypeParser {
321323
if (this.check([0xEF, 0xBB, 0xBF])) { // UTF-8-BOM
322324
// Strip off UTF-8-BOM
323325
this.tokenizer.ignore(3);
324-
return this.parse(tokenizer);
326+
return this.detectCore(tokenizer);
325327
}
326328

327329
if (this.check([0x47, 0x49, 0x46])) {
@@ -1381,39 +1383,6 @@ export class FileTypeParser {
13811383
return undefined; // Some unknown text based format
13821384
}
13831385

1384-
// -- Unsafe signatures --
1385-
1386-
if (
1387-
this.check([0x0, 0x0, 0x1, 0xBA])
1388-
|| this.check([0x0, 0x0, 0x1, 0xB3])
1389-
) {
1390-
return {
1391-
ext: 'mpg',
1392-
mime: 'video/mpeg',
1393-
};
1394-
}
1395-
1396-
if (this.check([0x00, 0x01, 0x00, 0x00, 0x00])) {
1397-
return {
1398-
ext: 'ttf',
1399-
mime: 'font/ttf',
1400-
};
1401-
}
1402-
1403-
if (this.check([0x00, 0x00, 0x01, 0x00])) {
1404-
return {
1405-
ext: 'ico',
1406-
mime: 'image/x-icon',
1407-
};
1408-
}
1409-
1410-
if (this.check([0x00, 0x00, 0x02, 0x00])) {
1411-
return {
1412-
ext: 'cur',
1413-
mime: 'image/x-icon',
1414-
};
1415-
}
1416-
14171386
if (this.check([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1])) {
14181387
// Detected Microsoft Compound File Binary File (MS-CFB) Format.
14191388
return {
@@ -1619,6 +1588,44 @@ export class FileTypeParser {
16191588
mime: 'application/pgp-encrypted',
16201589
};
16211590
}
1591+
};
1592+
1593+
detectUnsafe = async tokenizer => {
1594+
this.buffer = new Uint8Array(reasonableDetectionSizeInBytes);
1595+
1596+
// Read initial sample size of 8 bytes
1597+
await tokenizer.peekBuffer(this.buffer, {length: Math.min(8, tokenizer.fileInfo.size), mayBeLess: true});
1598+
1599+
if (
1600+
this.check([0x0, 0x0, 0x1, 0xBA])
1601+
|| this.check([0x0, 0x0, 0x1, 0xB3])
1602+
) {
1603+
return {
1604+
ext: 'mpg',
1605+
mime: 'video/mpeg',
1606+
};
1607+
}
1608+
1609+
if (this.check([0x00, 0x01, 0x00, 0x00, 0x00])) {
1610+
return {
1611+
ext: 'ttf',
1612+
mime: 'font/ttf',
1613+
};
1614+
}
1615+
1616+
if (this.check([0x00, 0x00, 0x01, 0x00])) {
1617+
return {
1618+
ext: 'ico',
1619+
mime: 'image/x-icon',
1620+
};
1621+
}
1622+
1623+
if (this.check([0x00, 0x00, 0x02, 0x00])) {
1624+
return {
1625+
ext: 'cur',
1626+
mime: 'image/x-icon',
1627+
};
1628+
}
16221629

16231630
// Check MPEG 1 or 2 Layer 3 header, or 'layer 0' for ADTS (MPEG sync-word 0xFFE)
16241631
if (this.buffer.length >= 2 && this.check([0xFF, 0xE0], {offset: 0, mask: [0xFF, 0xE0]})) {

readme.md

+6-5
Original file line numberDiff line numberDiff line change
@@ -364,8 +364,9 @@ Below is an example of a custom detector array. This can be passed to the `FileT
364364
```js
365365
import {FileTypeParser} from 'file-type';
366366

367-
const customDetectors = [
368-
async tokenizer => {
367+
const unicornDetector = {
368+
id: 'unicorn',
369+
async detect(tokenizer) {
369370
const unicornHeader = [85, 78, 73, 67, 79, 82, 78]; // "UNICORN" in ASCII decimal
370371

371372
const buffer = new Uint8Array(unicornHeader.length);
@@ -375,11 +376,11 @@ const customDetectors = [
375376
}
376377

377378
return undefined;
378-
},
379-
];
379+
}
380+
}
380381

381382
const buffer = new Uint8Array([85, 78, 73, 67, 79, 82, 78]);
382-
const parser = new FileTypeParser({customDetectors});
383+
const parser = new FileTypeParser({customDetectors: [unicornDetector]});
383384
const fileType = await parser.fromBuffer(buffer);
384385
console.log(fileType); // {ext: 'unicorn', mime: 'application/unicorn'}
385386
```

test.js

+21-12
Original file line numberDiff line numberDiff line change
@@ -688,22 +688,31 @@ test('corrupt MKV throws', async t => {
688688
});
689689

690690
// Create a custom detector for the just made up "unicorn" file type
691-
const unicornDetector = async tokenizer => {
692-
const unicornHeader = [85, 78, 73, 67, 79, 82, 78]; // "UNICORN" as decimal string
693-
const buffer = new Uint8Array(7);
694-
await tokenizer.peekBuffer(buffer, {length: unicornHeader.length, mayBeLess: true});
695-
if (unicornHeader.every((value, index) => value === buffer[index])) {
696-
return {ext: 'unicorn', mime: 'application/unicorn'};
697-
}
691+
const unicornDetector = {
692+
id: 'mock.unicorn',
693+
async detect(tokenizer) {
694+
const unicornHeader = [85, 78, 73, 67, 79, 82, 78]; // "UNICORN" as decimal string
695+
const buffer = new Uint8Array(7);
696+
await tokenizer.peekBuffer(buffer, {length: unicornHeader.length, mayBeLess: true});
697+
if (unicornHeader.every((value, index) => value === buffer[index])) {
698+
return {ext: 'unicorn', mime: 'application/unicorn'};
699+
}
698700

699-
return undefined;
701+
return undefined;
702+
},
700703
};
701704

702-
const mockPngDetector = _tokenizer => ({ext: 'mockPng', mime: 'image/mockPng'});
705+
const mockPngDetector = {
706+
id: 'mock.png',
707+
detect: () => ({ext: 'mockPng', mime: 'image/mockPng'}),
708+
};
703709

704-
const tokenizerPositionChanger = tokenizer => {
705-
const buffer = new Uint8Array(1);
706-
tokenizer.readBuffer(buffer, {length: 1, mayBeLess: true});
710+
const tokenizerPositionChanger = {
711+
id: 'mock.dirtyTokenizer',
712+
detect(tokenizer) {
713+
const buffer = new Uint8Array(1);
714+
tokenizer.readBuffer(buffer, {length: 1, mayBeLess: true});
715+
},
707716
};
708717

709718
if (nodeMajorVersion >= nodeVersionSupportingByteBlobStream) {

0 commit comments

Comments
 (0)