From 1ece1f1ccea3a2fdd698b648a0b322930cc00006 Mon Sep 17 00:00:00 2001 From: yoannchb-pro <113145565+yoannets@users.noreply.github.com> Date: Sat, 21 Jan 2023 16:55:59 -0500 Subject: [PATCH 1/2] init --- .gitignore | 1 - .npmignore | 1 - CHANGELOG.md | 8 + README.md | 159 +++++-------- dist/constant/GOOGLE_CONSTANT.d.ts | 11 + dist/constant/GOOGLE_CONSTANT.js | 14 ++ dist/constant/GOOGLE_CONSTANT.js.map | 1 + .../extensions/IMAGES_EXTENSIONS.json | 1 + dist/constant/query/GOOGLE_COLORS.d.ts | 2 + dist/constant/query/GOOGLE_COLORS.js | 18 ++ dist/constant/query/GOOGLE_COLORS.js.map | 1 + dist/constant/query/GOOGLE_PARAMS.d.ts | 10 + dist/constant/query/GOOGLE_PARAMS.js | 13 ++ dist/constant/query/GOOGLE_PARAMS.js.map | 1 + dist/constant/query/GOOGLE_QUERY.d.ts | 3 + dist/constant/query/GOOGLE_QUERY.js | 50 +++++ dist/constant/query/GOOGLE_QUERY.js.map | 1 + dist/core/buildGoogleDork.d.ts | 8 + dist/core/buildGoogleDork.js | 97 ++++++++ dist/core/buildGoogleDork.js.map | 1 + dist/core/constructGoogleUrl.d.ts | 8 + dist/core/constructGoogleUrl.js | 22 ++ dist/core/constructGoogleUrl.js.map | 1 + dist/core/limitResultSize.d.ts | 9 + dist/core/limitResultSize.js | 17 ++ dist/core/limitResultSize.js.map | 1 + dist/core/parseGoogleImages.d.ts | 8 + dist/core/parseGoogleImages.js | 69 ++++++ dist/core/parseGoogleImages.js.map | 1 + dist/core/verifyGoogleQuery.d.ts | 7 + dist/core/verifyGoogleQuery.js | 27 +++ dist/core/verifyGoogleQuery.js.map | 1 + dist/index.d.ts | 5 + dist/index.js | 32 +++ dist/index.js.map | 1 + dist/utils/utils.d.ts | 19 ++ dist/utils/utils.js | 40 ++++ dist/utils/utils.js.map | 1 + jest.config.ts | 11 + package.json | 23 +- src/back/google-img-scrap-1.0.7-.js | 184 --------------- ...{GOOGLE_CONSTANT.js => GOOGLE_CONSTANT.ts} | 2 +- .../{GOOGLE_COLORS.js => GOOGLE_COLORS.ts} | 2 +- src/constant/query/GOOGLE_PARAMS.js | 17 -- src/constant/query/GOOGLE_PARAMS.ts | 11 + src/constant/query/GOOGLE_QUERY.js | 59 ----- src/constant/query/GOOGLE_QUERY.ts | 57 +++++ src/core/buildGoogleDork.ts | 103 +++++++++ src/core/constructGoogleUrl.ts | 24 ++ src/core/limitResultSize.ts | 22 ++ src/core/parseGoogleImages.ts | 71 ++++++ src/core/verifyGoogleQuery.ts | 33 +++ src/google-img-scrap.js | 212 ------------------ src/index.ts | 22 ++ src/utils/UTILS.js | 20 -- src/utils/utils.ts | 40 ++++ {test => test-back}/test-filter-titles.js | 5 +- {test => test-back}/test-last-version.js | 2 +- {test => test-back}/test-result-limit.js | 0 {test => test-back}/test-simple.js | 0 {test => test-back}/test-url-match.js | 0 {test => test-back}/test-wallpaper.js | 0 {test => test-back}/test.js | 6 +- tsconfig.build.json | 4 + tsconfig.json | 16 ++ types/config.d.ts | 21 ++ types/googleQuery.d.ts | 55 +++++ types/imageResultItem.d.ts | 16 ++ types/index.d.ts | 108 --------- types/results.d.ts | 8 + 70 files changed, 1107 insertions(+), 717 deletions(-) delete mode 100644 .npmignore create mode 100644 dist/constant/GOOGLE_CONSTANT.d.ts create mode 100644 dist/constant/GOOGLE_CONSTANT.js create mode 100644 dist/constant/GOOGLE_CONSTANT.js.map create mode 100644 dist/constant/extensions/IMAGES_EXTENSIONS.json create mode 100644 dist/constant/query/GOOGLE_COLORS.d.ts create mode 100644 dist/constant/query/GOOGLE_COLORS.js create mode 100644 dist/constant/query/GOOGLE_COLORS.js.map create mode 100644 dist/constant/query/GOOGLE_PARAMS.d.ts create mode 100644 dist/constant/query/GOOGLE_PARAMS.js create mode 100644 dist/constant/query/GOOGLE_PARAMS.js.map create mode 100644 dist/constant/query/GOOGLE_QUERY.d.ts create mode 100644 dist/constant/query/GOOGLE_QUERY.js create mode 100644 dist/constant/query/GOOGLE_QUERY.js.map create mode 100644 dist/core/buildGoogleDork.d.ts create mode 100644 dist/core/buildGoogleDork.js create mode 100644 dist/core/buildGoogleDork.js.map create mode 100644 dist/core/constructGoogleUrl.d.ts create mode 100644 dist/core/constructGoogleUrl.js create mode 100644 dist/core/constructGoogleUrl.js.map create mode 100644 dist/core/limitResultSize.d.ts create mode 100644 dist/core/limitResultSize.js create mode 100644 dist/core/limitResultSize.js.map create mode 100644 dist/core/parseGoogleImages.d.ts create mode 100644 dist/core/parseGoogleImages.js create mode 100644 dist/core/parseGoogleImages.js.map create mode 100644 dist/core/verifyGoogleQuery.d.ts create mode 100644 dist/core/verifyGoogleQuery.js create mode 100644 dist/core/verifyGoogleQuery.js.map create mode 100644 dist/index.d.ts create mode 100644 dist/index.js create mode 100644 dist/index.js.map create mode 100644 dist/utils/utils.d.ts create mode 100644 dist/utils/utils.js create mode 100644 dist/utils/utils.js.map create mode 100644 jest.config.ts delete mode 100644 src/back/google-img-scrap-1.0.7-.js rename src/constant/{GOOGLE_CONSTANT.js => GOOGLE_CONSTANT.ts} (90%) rename src/constant/query/{GOOGLE_COLORS.js => GOOGLE_COLORS.ts} (83%) delete mode 100644 src/constant/query/GOOGLE_PARAMS.js create mode 100644 src/constant/query/GOOGLE_PARAMS.ts delete mode 100644 src/constant/query/GOOGLE_QUERY.js create mode 100644 src/constant/query/GOOGLE_QUERY.ts create mode 100644 src/core/buildGoogleDork.ts create mode 100644 src/core/constructGoogleUrl.ts create mode 100644 src/core/limitResultSize.ts create mode 100644 src/core/parseGoogleImages.ts create mode 100644 src/core/verifyGoogleQuery.ts delete mode 100644 src/google-img-scrap.js create mode 100644 src/index.ts delete mode 100644 src/utils/UTILS.js create mode 100644 src/utils/utils.ts rename {test => test-back}/test-filter-titles.js (56%) rename {test => test-back}/test-last-version.js (86%) rename {test => test-back}/test-result-limit.js (100%) rename {test => test-back}/test-simple.js (100%) rename {test => test-back}/test-url-match.js (100%) rename {test => test-back}/test-wallpaper.js (100%) rename {test => test-back}/test.js (78%) create mode 100644 tsconfig.build.json create mode 100644 tsconfig.json create mode 100644 types/config.d.ts create mode 100644 types/googleQuery.d.ts create mode 100644 types/imageResultItem.d.ts delete mode 100644 types/index.d.ts create mode 100644 types/results.d.ts diff --git a/.gitignore b/.gitignore index 6704566..adb2c19 100644 --- a/.gitignore +++ b/.gitignore @@ -80,7 +80,6 @@ typings/ # Nuxt.js build / generate output .nuxt -dist # Gatsby files .cache/ diff --git a/.npmignore b/.npmignore deleted file mode 100644 index 191381e..0000000 --- a/.npmignore +++ /dev/null @@ -1 +0,0 @@ -.git \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index f9f6550..c86bd6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +### 1.0.9 + +- Fixed many bugs +- `filterByTitles` is now working +- `urlMatch` added in types +- All the code have been write back in typescript with a new structure +- Removed `execute` + ### 1.0.8 - Fixed "ERROR: Cannot assign to "queryName" because it is a constant" (by GaspardCulis) diff --git a/README.md b/README.md index 8bf1a3b..9ad2978 100644 --- a/README.md +++ b/README.md @@ -20,12 +20,13 @@ npm i google-img-scrap ```js const { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } = require("google-img-scrap"); +// OR +import { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } from "google-img-scrap"; ``` ## Query Params - "search" `string` what you want to search -- "execute" `(element: FinalResult) => FinalResult | undefined` allow you to execute a function to filter results - "excludeWords" `string[]` exclude some words from the search - "domains" `string[]` filter by domains - "excludeDomains" `string[]` exclude some domains @@ -76,63 +77,39 @@ const { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } = require("google-img-scrap"); Search cats images ```js -(async function () { - const test = await GOOGLE_IMG_SCRAP({ - search: "cats", - }); +const test = await GOOGLE_IMG_SCRAP({ + search: "cats", +}); - console.log(test); -})(); -``` - -## Filtering - -```js -(async function () { - const test = await GOOGLE_IMG_SCRAP({ - search: "demon slayer background hd", - execute: function (element) { - if (element.url.length < 20) return element; - }, - }); - - console.log(test); -})(); +console.log(test); ``` ## Custom query -All query options are optional (see below for all the options) +All query options are optional (see below for all the options). You can combine as much as you want. ```js -(async function () { - const test = await GOOGLE_IMG_SCRAP({ - search: "cats", - query: { - TYPE: GOOGLE_QUERY.TYPE.CLIPART, - DATE: GOOGLE_QUERY.DATE.YEAR, - COLOR: GOOGLE_QUERY.COLOR.BLACK_AND_WHITE, - SIZE: GOOGLE_QUERY.SIZE.LARGE, - LICENCE: GOOGLE_QUERY.LICENCE.COMMERCIAL_AND_OTHER, - EXTENSION: GOOGLE_QUERY.EXTENSION.JPG, - }, - }); +const test = await GOOGLE_IMG_SCRAP({ + search: "cats", + query: { + TYPE: GOOGLE_QUERY.TYPE.CLIPART, + LICENCE: GOOGLE_QUERY.LICENCE.COMMERCIAL_AND_OTHER, + EXTENSION: GOOGLE_QUERY.EXTENSION.JPG, + }, +}); - console.log(test); -})(); +console.log(test); ``` ## Limit result size ```js -(async function () { - const test = await GOOGLE_IMG_SCRAP({ - search: "cats", - limit: 5, - }); - - console.log(test); -})(); +const test = await GOOGLE_IMG_SCRAP({ + search: "cats", + limit: 5, +}); + +console.log(test); ``` ## Domains @@ -140,27 +117,23 @@ All query options are optional (see below for all the options) Only scrap from a specific domain ```js -(async function () { - const test = await GOOGLE_IMG_SCRAP({ - search: "cats", - domains: ["alamy.com", "istockphoto.com", "vecteezy.com"], - }); - - console.log(test); -})(); +const test = await GOOGLE_IMG_SCRAP({ + search: "cats", + domains: ["alamy.com", "istockphoto.com", "vecteezy.com"], +}); + +console.log(test); ``` ## Exclude domains ```js -(async function () { - const test = await GOOGLE_IMG_SCRAP({ - search: "cats", - excludeDomains: ["istockphoto.com", "alamy.com"], - }); - - console.log(test); -})(); +const test = await GOOGLE_IMG_SCRAP({ + search: "cats", + excludeDomains: ["istockphoto.com", "alamy.com"], +}); + +console.log(test); ``` ## Exclude words @@ -168,40 +141,34 @@ Only scrap from a specific domain If you don' like black cats and white cats ```js -(async function () { - const test = await GOOGLE_IMG_SCRAP({ - search: "cats", - excludeWords: ["black", "white"], //If you don't like black cats and white cats - }); - - console.log(test); -})(); +const test = await GOOGLE_IMG_SCRAP({ + search: "cats", + excludeWords: ["black", "white"], //If you don't like black cats and white cats +}); + +console.log(test); ``` ## Safe search (no nsfw) ```js -(async function () { - const test = await GOOGLE_IMG_SCRAP({ - search: "cats", - safeSearch: false, - }); - - console.log(test); -})(); +const test = await GOOGLE_IMG_SCRAP({ + search: "cats", + safeSearch: false, +}); + +console.log(test); ``` ## Custom query params ```js -(async function () { - const test = await GOOGLE_IMG_SCRAP({ - search: "cats", - custom: "name=content&name2=content2", - }); - - console.log(test); -})(); +const test = await GOOGLE_IMG_SCRAP({ + search: "cats", + custom: "name=content&name2=content2", +}); + +console.log(test); ``` ## How urlMatch and filterByTitles work ? @@ -209,18 +176,16 @@ If you don' like black cats and white cats - urlMatch work like filterByTiles ```js -(async function () { - const test = await GOOGLE_IMG_SCRAP({ - search: "cats", - //will build something like this "(draw and white) or (albino and white)" - filterByTitles: [ - ["draw", "white"], - ["albino", "white"], - ], - }); - - console.log(test); -})(); +const test = await GOOGLE_IMG_SCRAP({ + search: "cats", + //will build something like this "(draw and white) or (albino and white)" + filterByTitles: [ + ["draw", "white"], + ["albino", "white"], + ], +}); + +console.log(test); ``` ## Google query diff --git a/dist/constant/GOOGLE_CONSTANT.d.ts b/dist/constant/GOOGLE_CONSTANT.d.ts new file mode 100644 index 0000000..261782b --- /dev/null +++ b/dist/constant/GOOGLE_CONSTANT.d.ts @@ -0,0 +1,11 @@ +declare const GOOGLE_CONSTANT: { + url: string; + queryParam: string; + forceGoogleImage: { + tbm: string; + }; + headers: { + "User-Agent": string; + }; +}; +export default GOOGLE_CONSTANT; diff --git a/dist/constant/GOOGLE_CONSTANT.js b/dist/constant/GOOGLE_CONSTANT.js new file mode 100644 index 0000000..17918f9 --- /dev/null +++ b/dist/constant/GOOGLE_CONSTANT.js @@ -0,0 +1,14 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +const GOOGLE_CONSTANT = { + url: "https://images.google.com/search", + queryParam: "tbs", + forceGoogleImage: { + tbm: "isch", //needed to search on google image instead of google + }, + headers: { + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36", + }, +}; +exports.default = GOOGLE_CONSTANT; +//# sourceMappingURL=GOOGLE_CONSTANT.js.map \ No newline at end of file diff --git a/dist/constant/GOOGLE_CONSTANT.js.map b/dist/constant/GOOGLE_CONSTANT.js.map new file mode 100644 index 0000000..344646b --- /dev/null +++ b/dist/constant/GOOGLE_CONSTANT.js.map @@ -0,0 +1 @@ +{"version":3,"file":"GOOGLE_CONSTANT.js","sourceRoot":"","sources":["../../src/constant/GOOGLE_CONSTANT.ts"],"names":[],"mappings":";;AAAA,MAAM,eAAe,GAAG;IACtB,GAAG,EAAE,kCAAkC;IACvC,UAAU,EAAE,KAAK;IACjB,gBAAgB,EAAE;QAChB,GAAG,EAAE,MAAM,EAAE,oDAAoD;KAClE;IACD,OAAO,EAAE;QACP,YAAY,EACV,2HAA2H;KAC9H;CACF,CAAC;AAEF,kBAAe,eAAe,CAAC"} \ No newline at end of file diff --git a/dist/constant/extensions/IMAGES_EXTENSIONS.json b/dist/constant/extensions/IMAGES_EXTENSIONS.json new file mode 100644 index 0000000..72e36c7 --- /dev/null +++ b/dist/constant/extensions/IMAGES_EXTENSIONS.json @@ -0,0 +1 @@ +["jpg", "gif", "bmp", "png", "svg", "webp", "ico", "raw"] diff --git a/dist/constant/query/GOOGLE_COLORS.d.ts b/dist/constant/query/GOOGLE_COLORS.d.ts new file mode 100644 index 0000000..903e935 --- /dev/null +++ b/dist/constant/query/GOOGLE_COLORS.d.ts @@ -0,0 +1,2 @@ +declare const COLORS: string[]; +export default COLORS; diff --git a/dist/constant/query/GOOGLE_COLORS.js b/dist/constant/query/GOOGLE_COLORS.js new file mode 100644 index 0000000..85941c2 --- /dev/null +++ b/dist/constant/query/GOOGLE_COLORS.js @@ -0,0 +1,18 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +const COLORS = [ + "red", + "blue", + "purple", + "orange", + "yellow", + "green", + "teal", + "pink", + "white", + "gray", + "black", + "brown", +]; +exports.default = COLORS; +//# sourceMappingURL=GOOGLE_COLORS.js.map \ No newline at end of file diff --git a/dist/constant/query/GOOGLE_COLORS.js.map b/dist/constant/query/GOOGLE_COLORS.js.map new file mode 100644 index 0000000..a02497c --- /dev/null +++ b/dist/constant/query/GOOGLE_COLORS.js.map @@ -0,0 +1 @@ +{"version":3,"file":"GOOGLE_COLORS.js","sourceRoot":"","sources":["../../../src/constant/query/GOOGLE_COLORS.ts"],"names":[],"mappings":";;AAAA,MAAM,MAAM,GAAG;IACb,KAAK;IACL,MAAM;IACN,QAAQ;IACR,QAAQ;IACR,QAAQ;IACR,OAAO;IACP,MAAM;IACN,MAAM;IACN,OAAO;IACP,MAAM;IACN,OAAO;IACP,OAAO;CACR,CAAC;AAEF,kBAAe,MAAM,CAAC"} \ No newline at end of file diff --git a/dist/constant/query/GOOGLE_PARAMS.d.ts b/dist/constant/query/GOOGLE_PARAMS.d.ts new file mode 100644 index 0000000..e72544d --- /dev/null +++ b/dist/constant/query/GOOGLE_PARAMS.d.ts @@ -0,0 +1,10 @@ +declare const GOOGLE_PARAMS: { + SIZE: string; + COLOR: string; + SPECIFIC_COLOR: string; + TYPE: string; + DATE: string; + LICENCE: string; + IMAGE_EXTENSION: string; +}; +export default GOOGLE_PARAMS; diff --git a/dist/constant/query/GOOGLE_PARAMS.js b/dist/constant/query/GOOGLE_PARAMS.js new file mode 100644 index 0000000..e73371c --- /dev/null +++ b/dist/constant/query/GOOGLE_PARAMS.js @@ -0,0 +1,13 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +const GOOGLE_PARAMS = { + SIZE: "isz", + COLOR: "ic", + SPECIFIC_COLOR: "Cisc", + TYPE: "itp", + DATE: "qdr", + LICENCE: "il", + IMAGE_EXTENSION: "ift", +}; +exports.default = GOOGLE_PARAMS; +//# sourceMappingURL=GOOGLE_PARAMS.js.map \ No newline at end of file diff --git a/dist/constant/query/GOOGLE_PARAMS.js.map b/dist/constant/query/GOOGLE_PARAMS.js.map new file mode 100644 index 0000000..b311702 --- /dev/null +++ b/dist/constant/query/GOOGLE_PARAMS.js.map @@ -0,0 +1 @@ +{"version":3,"file":"GOOGLE_PARAMS.js","sourceRoot":"","sources":["../../../src/constant/query/GOOGLE_PARAMS.ts"],"names":[],"mappings":";;AAAA,MAAM,aAAa,GAAG;IACpB,IAAI,EAAE,KAAK;IACX,KAAK,EAAE,IAAI;IACX,cAAc,EAAE,MAAM;IACtB,IAAI,EAAE,KAAK;IACX,IAAI,EAAE,KAAK;IACX,OAAO,EAAE,IAAI;IACb,eAAe,EAAE,KAAK;CACvB,CAAC;AAEF,kBAAe,aAAa,CAAC"} \ No newline at end of file diff --git a/dist/constant/query/GOOGLE_QUERY.d.ts b/dist/constant/query/GOOGLE_QUERY.d.ts new file mode 100644 index 0000000..f4a2827 --- /dev/null +++ b/dist/constant/query/GOOGLE_QUERY.d.ts @@ -0,0 +1,3 @@ +import GoogleQuery from "../../../types/googleQuery"; +declare const _default: GoogleQuery; +export default _default; diff --git a/dist/constant/query/GOOGLE_QUERY.js b/dist/constant/query/GOOGLE_QUERY.js new file mode 100644 index 0000000..416fe76 --- /dev/null +++ b/dist/constant/query/GOOGLE_QUERY.js @@ -0,0 +1,50 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +const GOOGLE_PARAMS_1 = require("./GOOGLE_PARAMS"); +const GOOGLE_COLORS_1 = require("./GOOGLE_COLORS"); +const IMAGES_EXTENSIONS_json_1 = require("../extensions/IMAGES_EXTENSIONS.json"); +const GOOGLE_QUERY = { + SIZE: { + LARGE: GOOGLE_PARAMS_1.default.SIZE + ":l", + MEDIUM: GOOGLE_PARAMS_1.default.SIZE + ":m", + ICON: GOOGLE_PARAMS_1.default.SIZE + ":i", + }, + COLOR: { + BLACK_AND_WHITE: GOOGLE_PARAMS_1.default.COLOR + ":gray", + TRANSPARENT: GOOGLE_PARAMS_1.default.COLOR + ":trans", + }, + TYPE: { + CLIPART: GOOGLE_PARAMS_1.default.TYPE + ":clipart", + DRAW: GOOGLE_PARAMS_1.default.TYPE + ":lineart", + GIF: GOOGLE_PARAMS_1.default.TYPE + ":animated", + }, + EXTENSION: {}, + DATE: { + DAY: GOOGLE_PARAMS_1.default.DATE + ":d", + WEEK: GOOGLE_PARAMS_1.default.DATE + ":w", + MONTH: GOOGLE_PARAMS_1.default.DATE + ":m", + YEAR: GOOGLE_PARAMS_1.default.DATE + ":y", + }, + LICENCE: { + CREATIVE_COMMONS: GOOGLE_PARAMS_1.default.LICENCE + ":cl", + COMMERCIAL_AND_OTHER: GOOGLE_PARAMS_1.default.LICENCE + ":ol", + }, +}; +//build extension +for (const EXTENSION of IMAGES_EXTENSIONS_json_1.default) { + const queryName = EXTENSION.toUpperCase(); + GOOGLE_QUERY.EXTENSION[queryName] = + GOOGLE_PARAMS_1.default.IMAGE_EXTENSION + ":" + EXTENSION; +} +//build colors +for (const COLOR of GOOGLE_COLORS_1.default) { + const queryName = COLOR.toUpperCase(); + GOOGLE_QUERY.COLOR[queryName] = + GOOGLE_PARAMS_1.default.COLOR + + ":specific," + + GOOGLE_PARAMS_1.default.SPECIFIC_COLOR + + ":" + + COLOR; +} +exports.default = GOOGLE_QUERY; +//# sourceMappingURL=GOOGLE_QUERY.js.map \ No newline at end of file diff --git a/dist/constant/query/GOOGLE_QUERY.js.map b/dist/constant/query/GOOGLE_QUERY.js.map new file mode 100644 index 0000000..9625c2f --- /dev/null +++ b/dist/constant/query/GOOGLE_QUERY.js.map @@ -0,0 +1 @@ +{"version":3,"file":"GOOGLE_QUERY.js","sourceRoot":"","sources":["../../../src/constant/query/GOOGLE_QUERY.ts"],"names":[],"mappings":";;AAAA,mDAA4C;AAC5C,mDAAqC;AACrC,iFAA8D;AAG9D,MAAM,YAAY,GAAG;IACnB,IAAI,EAAE;QACJ,KAAK,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;QAChC,MAAM,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;QACjC,IAAI,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;KAChC;IAED,KAAK,EAAE;QACL,eAAe,EAAE,uBAAa,CAAC,KAAK,GAAG,OAAO;QAC9C,WAAW,EAAE,uBAAa,CAAC,KAAK,GAAG,QAAQ;KAC5C;IAED,IAAI,EAAE;QACJ,OAAO,EAAE,uBAAa,CAAC,IAAI,GAAG,UAAU;QACxC,IAAI,EAAE,uBAAa,CAAC,IAAI,GAAG,UAAU;QACrC,GAAG,EAAE,uBAAa,CAAC,IAAI,GAAG,WAAW;KACtC;IAED,SAAS,EAAE,EAAE;IAEb,IAAI,EAAE;QACJ,GAAG,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;QAC9B,IAAI,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;QAC/B,KAAK,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;QAChC,IAAI,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;KAChC;IAED,OAAO,EAAE;QACP,gBAAgB,EAAE,uBAAa,CAAC,OAAO,GAAG,KAAK;QAC/C,oBAAoB,EAAE,uBAAa,CAAC,OAAO,GAAG,KAAK;KACpD;CACF,CAAC;AAEF,iBAAiB;AACjB,KAAK,MAAM,SAAS,IAAI,gCAAU,EAAE;IAClC,MAAM,SAAS,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC;IACzC,YAAoB,CAAC,SAAS,CAAC,SAAS,CAAC;QACxC,uBAAa,CAAC,eAAe,GAAG,GAAG,GAAG,SAAS,CAAC;CACnD;AAED,cAAc;AACd,KAAK,MAAM,KAAK,IAAI,uBAAM,EAAE;IAC1B,MAAM,SAAS,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;IACrC,YAAoB,CAAC,KAAK,CAAC,SAAS,CAAC;QACpC,uBAAa,CAAC,KAAK;YACnB,YAAY;YACZ,uBAAa,CAAC,cAAc;YAC5B,GAAG;YACH,KAAK,CAAC;CACT;AAED,kBAAe,YAA2B,CAAC"} \ No newline at end of file diff --git a/dist/core/buildGoogleDork.d.ts b/dist/core/buildGoogleDork.d.ts new file mode 100644 index 0000000..417a3f2 --- /dev/null +++ b/dist/core/buildGoogleDork.d.ts @@ -0,0 +1,8 @@ +import Config from "../../types/config"; +/** + * Build google dork string based on the config query + * @param config + * @returns + */ +declare function buildGoogleDorks(config: Config): string; +export default buildGoogleDorks; diff --git a/dist/core/buildGoogleDork.js b/dist/core/buildGoogleDork.js new file mode 100644 index 0000000..cee115e --- /dev/null +++ b/dist/core/buildGoogleDork.js @@ -0,0 +1,97 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +/** + * Show only images with a particular title + * @param config + * @returns + */ +function filterByTitlesBuilder(config) { + const FILTER_TITLE = []; + if (config.filterByTitles) { + for (const titleFilter of config.filterByTitles) { + const value = titleFilter.map((title) => { + return `intitle:"${title}"`; + }); + FILTER_TITLE.push(`(${value.join(" AND ")})`); + } + } + return FILTER_TITLE.join(" "); +} +/** + * Show only images without some specific words + * @param config + * @returns + */ +function excludeWordsBuilder(config) { + const EXCLUDE_WORDS = []; + if (config.excludeWords) { + for (const excludeWord of config.excludeWords) { + EXCLUDE_WORDS.push(`-"${excludeWord}"`); + } + } + return EXCLUDE_WORDS.join(" "); +} +/** + * Show only images of some particular domains + * @param config + * @returns + */ +function onlyDomainsBuilder(config) { + const DOMAINS = []; + if (config.domains) { + for (const domain of config.domains) { + DOMAINS.push(`site:"${domain}"`); + } + } + return DOMAINS.join(" OR "); +} +/** + * Don't show images from particular domains + * @param config + * @returns + */ +function excludeDomainsBuilder(config) { + const EXCLUDE_DOMAINS = []; + if (config.excludeDomains) { + for (const excludeDomain of config.excludeDomains) { + EXCLUDE_DOMAINS.push(`-site:"${excludeDomain}"`); + } + } + return EXCLUDE_DOMAINS.join(" "); +} +/** + * Only show images with a domain that match a particular regex + * @param config + * @returns + */ +function urlMatchBuilder(config) { + const URL_MATCH = []; + if (config.urlMatch) { + for (const urlMatch of config.urlMatch) { + const value = urlMatch.map((content) => { + return `inurl:${content}`; + }); + URL_MATCH.push(`(${value.join(" AND ")})`); + } + } + return URL_MATCH.join(" OR "); +} +/** + * Build google dork string based on the config query + * @param config + * @returns + */ +function buildGoogleDorks(config) { + return [ + config.search, + urlMatchBuilder(config), + excludeWordsBuilder(config), + excludeDomainsBuilder(config), + onlyDomainsBuilder(config), + filterByTitlesBuilder(config), + ] + .join(" ") + .trim(); +} +exports.default = buildGoogleDorks; +//# sourceMappingURL=buildGoogleDork.js.map \ No newline at end of file diff --git a/dist/core/buildGoogleDork.js.map b/dist/core/buildGoogleDork.js.map new file mode 100644 index 0000000..7a79fbd --- /dev/null +++ b/dist/core/buildGoogleDork.js.map @@ -0,0 +1 @@ +{"version":3,"file":"buildGoogleDork.js","sourceRoot":"","sources":["../../src/core/buildGoogleDork.ts"],"names":[],"mappings":";;AAEA;;;;GAIG;AACH,SAAS,qBAAqB,CAAC,MAAc;IAC3C,MAAM,YAAY,GAAG,EAAE,CAAC;IACxB,IAAI,MAAM,CAAC,cAAc,EAAE;QACzB,KAAK,MAAM,WAAW,IAAI,MAAM,CAAC,cAAc,EAAE;YAC/C,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE;gBACtC,OAAO,YAAY,KAAK,GAAG,CAAC;YAC9B,CAAC,CAAC,CAAC;YAEH,YAAY,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;SAC/C;KACF;IACD,OAAO,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAChC,CAAC;AAED;;;;GAIG;AACH,SAAS,mBAAmB,CAAC,MAAc;IACzC,MAAM,aAAa,GAAG,EAAE,CAAC;IACzB,IAAI,MAAM,CAAC,YAAY,EAAE;QACvB,KAAK,MAAM,WAAW,IAAI,MAAM,CAAC,YAAY,EAAE;YAC7C,aAAa,CAAC,IAAI,CAAC,KAAK,WAAW,GAAG,CAAC,CAAC;SACzC;KACF;IACD,OAAO,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACjC,CAAC;AAED;;;;GAIG;AACH,SAAS,kBAAkB,CAAC,MAAc;IACxC,MAAM,OAAO,GAAG,EAAE,CAAC;IACnB,IAAI,MAAM,CAAC,OAAO,EAAE;QAClB,KAAK,MAAM,MAAM,IAAI,MAAM,CAAC,OAAO,EAAE;YACnC,OAAO,CAAC,IAAI,CAAC,SAAS,MAAM,GAAG,CAAC,CAAC;SAClC;KACF;IACD,OAAO,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC9B,CAAC;AAED;;;;GAIG;AACH,SAAS,qBAAqB,CAAC,MAAc;IAC3C,MAAM,eAAe,GAAG,EAAE,CAAC;IAC3B,IAAI,MAAM,CAAC,cAAc,EAAE;QACzB,KAAK,MAAM,aAAa,IAAI,MAAM,CAAC,cAAc,EAAE;YACjD,eAAe,CAAC,IAAI,CAAC,UAAU,aAAa,GAAG,CAAC,CAAC;SAClD;KACF;IACD,OAAO,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACnC,CAAC;AAED;;;;GAIG;AACH,SAAS,eAAe,CAAC,MAAc;IACrC,MAAM,SAAS,GAAG,EAAE,CAAC;IACrB,IAAI,MAAM,CAAC,QAAQ,EAAE;QACnB,KAAK,MAAM,QAAQ,IAAI,MAAM,CAAC,QAAQ,EAAE;YACtC,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE;gBACrC,OAAO,SAAS,OAAO,EAAE,CAAC;YAC5B,CAAC,CAAC,CAAC;YACH,SAAS,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;SAC5C;KACF;IACD,OAAO,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAChC,CAAC;AAED;;;;GAIG;AACH,SAAS,gBAAgB,CAAC,MAAc;IACtC,OAAO;QACL,MAAM,CAAC,MAAM;QACb,eAAe,CAAC,MAAM,CAAC;QACvB,mBAAmB,CAAC,MAAM,CAAC;QAC3B,qBAAqB,CAAC,MAAM,CAAC;QAC7B,kBAAkB,CAAC,MAAM,CAAC;QAC1B,qBAAqB,CAAC,MAAM,CAAC;KAC9B;SACE,IAAI,CAAC,GAAG,CAAC;SACT,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,kBAAe,gBAAgB,CAAC"} \ No newline at end of file diff --git a/dist/core/constructGoogleUrl.d.ts b/dist/core/constructGoogleUrl.d.ts new file mode 100644 index 0000000..803a237 --- /dev/null +++ b/dist/core/constructGoogleUrl.d.ts @@ -0,0 +1,8 @@ +import Config from "../../types/config"; +/** + * Construct google url for scrapping + * @param config + * @returns + */ +declare function constructGoogleUrl(config: Config): string; +export default constructGoogleUrl; diff --git a/dist/core/constructGoogleUrl.js b/dist/core/constructGoogleUrl.js new file mode 100644 index 0000000..77b75e3 --- /dev/null +++ b/dist/core/constructGoogleUrl.js @@ -0,0 +1,22 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +const GOOGLE_CONSTANT_1 = require("../constant/GOOGLE_CONSTANT"); +const utils_1 = require("../utils/utils"); +const buildGoogleDork_1 = require("./buildGoogleDork"); +/** + * Construct google url for scrapping + * @param config + * @returns + */ +function constructGoogleUrl(config) { + const GOOGLE_DORK = (0, buildGoogleDork_1.default)(config); + const CUSTOM_PARAM = config.custom ? `&${config.custom}` : ""; + const SAFE_SEARCH = config.safeSearch ? `&safe=active` : ""; + const QUERY = Object.assign(GOOGLE_CONSTANT_1.default.forceGoogleImage, { + [GOOGLE_CONSTANT_1.default.queryParam]: Object.values(config.query || {}).join(","), + q: GOOGLE_DORK, + }); + return GOOGLE_CONSTANT_1.default.url + (0, utils_1.buildQuery)(QUERY) + CUSTOM_PARAM + SAFE_SEARCH; +} +exports.default = constructGoogleUrl; +//# sourceMappingURL=constructGoogleUrl.js.map \ No newline at end of file diff --git a/dist/core/constructGoogleUrl.js.map b/dist/core/constructGoogleUrl.js.map new file mode 100644 index 0000000..7b107a1 --- /dev/null +++ b/dist/core/constructGoogleUrl.js.map @@ -0,0 +1 @@ +{"version":3,"file":"constructGoogleUrl.js","sourceRoot":"","sources":["../../src/core/constructGoogleUrl.ts"],"names":[],"mappings":";;AACA,iEAA0D;AAC1D,0CAA4C;AAC5C,uDAAiD;AAEjD;;;;GAIG;AACH,SAAS,kBAAkB,CAAC,MAAc;IACxC,MAAM,WAAW,GAAG,IAAA,yBAAgB,EAAC,MAAM,CAAC,CAAC;IAC7C,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9D,MAAM,WAAW,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC;IAE5D,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,yBAAe,CAAC,gBAAgB,EAAE;QAC5D,CAAC,yBAAe,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;QACzE,CAAC,EAAE,WAAW;KACf,CAAC,CAAC;IAEH,OAAO,yBAAe,CAAC,GAAG,GAAG,IAAA,kBAAU,EAAC,KAAK,CAAC,GAAG,YAAY,GAAG,WAAW,CAAC;AAC9E,CAAC;AAED,kBAAe,kBAAkB,CAAC"} \ No newline at end of file diff --git a/dist/core/limitResultSize.d.ts b/dist/core/limitResultSize.d.ts new file mode 100644 index 0000000..c51bbfa --- /dev/null +++ b/dist/core/limitResultSize.d.ts @@ -0,0 +1,9 @@ +import ImageResultItem from "../../types/imageResultItem"; +/** + * Limit the result size + * @param config + * @param imagesItems + * @returns + */ +declare function limitResultSize(limit: number, imagesItems: ImageResultItem[]): ImageResultItem[]; +export default limitResultSize; diff --git a/dist/core/limitResultSize.js b/dist/core/limitResultSize.js new file mode 100644 index 0000000..e5edfc3 --- /dev/null +++ b/dist/core/limitResultSize.js @@ -0,0 +1,17 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +/** + * Limit the result size + * @param config + * @param imagesItems + * @returns + */ +function limitResultSize(limit, imagesItems) { + let slicedResult = []; + if (limit && limit > 0 && imagesItems.length > limit) { + slicedResult = imagesItems.slice(0, limit); + } + return slicedResult.length > 0 ? slicedResult : imagesItems; +} +exports.default = limitResultSize; +//# sourceMappingURL=limitResultSize.js.map \ No newline at end of file diff --git a/dist/core/limitResultSize.js.map b/dist/core/limitResultSize.js.map new file mode 100644 index 0000000..be8bb41 --- /dev/null +++ b/dist/core/limitResultSize.js.map @@ -0,0 +1 @@ +{"version":3,"file":"limitResultSize.js","sourceRoot":"","sources":["../../src/core/limitResultSize.ts"],"names":[],"mappings":";;AAEA;;;;;GAKG;AACH,SAAS,eAAe,CACtB,KAAa,EACb,WAA8B;IAE9B,IAAI,YAAY,GAAsB,EAAE,CAAC;IAEzC,IAAI,KAAK,IAAI,KAAK,GAAG,CAAC,IAAI,WAAW,CAAC,MAAM,GAAG,KAAK,EAAE;QACpD,YAAY,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;KAC5C;IAED,OAAO,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,WAAW,CAAC;AAC9D,CAAC;AAED,kBAAe,eAAe,CAAC"} \ No newline at end of file diff --git a/dist/core/parseGoogleImages.d.ts b/dist/core/parseGoogleImages.d.ts new file mode 100644 index 0000000..2183abe --- /dev/null +++ b/dist/core/parseGoogleImages.d.ts @@ -0,0 +1,8 @@ +import ImageResultItem from "../../types/imageResultItem"; +/** + * Parse the html from google image to get the images links + * @param url + * @returns + */ +declare function parseGoogleImages(url: string): Promise; +export default parseGoogleImages; diff --git a/dist/core/parseGoogleImages.js b/dist/core/parseGoogleImages.js new file mode 100644 index 0000000..a964622 --- /dev/null +++ b/dist/core/parseGoogleImages.js @@ -0,0 +1,69 @@ +"use strict"; +var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { + function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } + return new (P || (P = Promise))(function (resolve, reject) { + function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } + function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } + function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } + step((generator = generator.apply(thisArg, _arguments || [])).next()); + }); +}; +Object.defineProperty(exports, "__esModule", { value: true }); +const utils_1 = require("../utils/utils"); +const GOOGLE_CONSTANT_1 = require("../constant/GOOGLE_CONSTANT"); +const axios_1 = require("axios"); +const { FastHTMLParser } = require("fast-html-dom-parser"); +/** + * Parse the html from google image to get the images links + * @param url + * @returns + */ +function parseGoogleImages(url) { + return __awaiter(this, void 0, void 0, function* () { + const { data } = yield (0, axios_1.default)(url, { + headers: GOOGLE_CONSTANT_1.default.headers, + }); + const parser = new FastHTMLParser(data); + const scripts = parser.getElementsByTagName("script"); + const result = []; + if (!scripts) + return result; + for (const script of scripts) { + const body = script.innerHTML; + if (!(0, utils_1.isImage)(body)) + continue; + //getting image url, height, width, average + const regex = /\["(http[^"]+?)",(\d+),(\d+)\],[\w\d]+?,[\w\d]+?,"rgb\((\d+),(\d+),(\d+)\)"/gi; + //getting originalUrl, title, id + const secondRegex = /\[[\w\d]+?,"([^"]+?)","(http[^"]+?)","([^"]+?)"/gi; + let res = null; + let secondRes = null; + while ((res = regex.exec(body)) != null && + (secondRes = secondRegex.exec(body)) != null) { + if (res.length >= 4 && + res[1].match(/http/gi).length < 2 && + secondRes.length === 4 && + secondRes[2].match(/http/gi).length < 2) { + const [r, g, b] = [res[4], res[5], res[6]].map((e) => parseInt(e, 10)); + result.push({ + id: secondRes[1], + title: secondRes[3], + url: res[1], + originalUrl: secondRes[2], + averageColor: `rgb(${r}, ${g}, ${b})`, + averageColorObject: { + r, + g, + b, + }, + height: parseInt(res[2], 10), + width: parseInt(res[3], 10), + }); + } + } + } + return result; + }); +} +exports.default = parseGoogleImages; +//# sourceMappingURL=parseGoogleImages.js.map \ No newline at end of file diff --git a/dist/core/parseGoogleImages.js.map b/dist/core/parseGoogleImages.js.map new file mode 100644 index 0000000..b7fab10 --- /dev/null +++ b/dist/core/parseGoogleImages.js.map @@ -0,0 +1 @@ +{"version":3,"file":"parseGoogleImages.js","sourceRoot":"","sources":["../../src/core/parseGoogleImages.ts"],"names":[],"mappings":";;;;;;;;;;;AAAA,0CAAyC;AACzC,iEAA0D;AAC1D,iCAA0B;AAG1B,MAAM,EAAE,cAAc,EAAE,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;AAE3D;;;;GAIG;AACH,SAAe,iBAAiB,CAAC,GAAW;;QAC1C,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,IAAA,eAAK,EAAC,GAAG,EAAE;YAChC,OAAO,EAAE,yBAAe,CAAC,OAAO;SACjC,CAAC,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,cAAc,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,OAAO,GAAG,MAAM,CAAC,oBAAoB,CAAC,QAAQ,CAAC,CAAC;QAEtD,MAAM,MAAM,GAAsB,EAAE,CAAC;QAErC,IAAI,CAAC,OAAO;YAAE,OAAO,MAAM,CAAC;QAE5B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;YAC5B,MAAM,IAAI,GAAG,MAAM,CAAC,SAAS,CAAC;YAE9B,IAAI,CAAC,IAAA,eAAO,EAAC,IAAI,CAAC;gBAAE,SAAS;YAE7B,2CAA2C;YAC3C,MAAM,KAAK,GACT,+EAA+E,CAAC;YAClF,gCAAgC;YAChC,MAAM,WAAW,GAAG,mDAAmD,CAAC;YAExE,IAAI,GAAG,GAAG,IAAI,CAAC;YACf,IAAI,SAAS,GAAG,IAAI,CAAC;YAErB,OACE,CAAC,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI;gBAChC,CAAC,SAAS,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,EAC5C;gBACA,IACE,GAAG,CAAC,MAAM,IAAI,CAAC;oBACf,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,MAAM,GAAG,CAAC;oBACjC,SAAS,CAAC,MAAM,KAAK,CAAC;oBACtB,SAAS,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,MAAM,GAAG,CAAC,EACvC;oBACA,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;oBAEvE,MAAM,CAAC,IAAI,CAAC;wBACV,EAAE,EAAE,SAAS,CAAC,CAAC,CAAC;wBAChB,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC;wBACnB,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;wBACX,WAAW,EAAE,SAAS,CAAC,CAAC,CAAC;wBACzB,YAAY,EAAE,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG;wBACrC,kBAAkB,EAAE;4BAClB,CAAC;4BACD,CAAC;4BACD,CAAC;yBACF;wBACD,MAAM,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;wBAC5B,KAAK,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;qBAC5B,CAAC,CAAC;iBACJ;aACF;SACF;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CAAA;AAED,kBAAe,iBAAiB,CAAC"} \ No newline at end of file diff --git a/dist/core/verifyGoogleQuery.d.ts b/dist/core/verifyGoogleQuery.d.ts new file mode 100644 index 0000000..6afe98a --- /dev/null +++ b/dist/core/verifyGoogleQuery.d.ts @@ -0,0 +1,7 @@ +import Config from "../../types/config"; +/** + * Validation of the query passed as argument + * @param config + */ +declare function verifyGoogleQuery(config: Config): void; +export default verifyGoogleQuery; diff --git a/dist/core/verifyGoogleQuery.js b/dist/core/verifyGoogleQuery.js new file mode 100644 index 0000000..4d7183e --- /dev/null +++ b/dist/core/verifyGoogleQuery.js @@ -0,0 +1,27 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +const GOOGLE_QUERY_1 = require("../constant/query/GOOGLE_QUERY"); +/** + * Validation of the query passed as argument + * @param config + */ +function verifyGoogleQuery(config) { + var _a; + if (config.excludeDomains && config.domains) + throw new Error("Can not set 'excludeDomains' and 'domains' as same times"); + if (!config.search || config.search.trim() == "") + throw new Error("'search' can not be empty"); + if (config.query) { + const queryToVerify = Object.keys(GOOGLE_QUERY_1.default); + for (const key of Object.keys(config.query)) { + if (!queryToVerify.includes(key)) + throw new Error(`Invalide query name '${key}'`); + const VALUES = Object.values(GOOGLE_QUERY_1.default[key]); + const ACTUAL_VALUE = (_a = config.query[key]) !== null && _a !== void 0 ? _a : ""; + if (!VALUES.includes(ACTUAL_VALUE)) + throw new Error(`'${ACTUAL_VALUE}' is not a valide argument for the query : '${key}'`); + } + } +} +exports.default = verifyGoogleQuery; +//# sourceMappingURL=verifyGoogleQuery.js.map \ No newline at end of file diff --git a/dist/core/verifyGoogleQuery.js.map b/dist/core/verifyGoogleQuery.js.map new file mode 100644 index 0000000..63c698a --- /dev/null +++ b/dist/core/verifyGoogleQuery.js.map @@ -0,0 +1 @@ +{"version":3,"file":"verifyGoogleQuery.js","sourceRoot":"","sources":["../../src/core/verifyGoogleQuery.ts"],"names":[],"mappings":";;AAEA,iEAA0D;AAE1D;;;GAGG;AACH,SAAS,iBAAiB,CAAC,MAAc;;IACvC,IAAI,MAAM,CAAC,cAAc,IAAI,MAAM,CAAC,OAAO;QACzC,MAAM,IAAI,KAAK,CAAC,0DAA0D,CAAC,CAAC;IAE9E,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,IAAI,EAAE;QAC9C,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;IAE/C,IAAI,MAAM,CAAC,KAAK,EAAE;QAChB,MAAM,aAAa,GAAG,MAAM,CAAC,IAAI,CAAC,sBAAY,CAAC,CAAC;QAEhD,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAA0B,EAAE;YACpE,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,GAAG,CAAC;gBAC9B,MAAM,IAAI,KAAK,CAAC,wBAAwB,GAAG,GAAG,CAAC,CAAC;YAElD,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,sBAAY,CAAC,GAAG,CAAC,CAAC,CAAC;YAChD,MAAM,YAAY,GAAG,MAAA,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,mCAAI,EAAE,CAAC;YAC7C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,YAAY,CAAC;gBAChC,MAAM,IAAI,KAAK,CACb,IAAI,YAAY,+CAA+C,GAAG,GAAG,CACtE,CAAC;SACL;KACF;AACH,CAAC;AAED,kBAAe,iBAAiB,CAAC"} \ No newline at end of file diff --git a/dist/index.d.ts b/dist/index.d.ts new file mode 100644 index 0000000..4b4a030 --- /dev/null +++ b/dist/index.d.ts @@ -0,0 +1,5 @@ +import GOOGLE_QUERY from "./constant/query/GOOGLE_QUERY"; +import Config from "../types/config"; +import Results from "../types/results"; +declare function GOOGLE_IMG_SCRAP(config: Config): Promise; +export { GOOGLE_IMG_SCRAP, GOOGLE_QUERY }; diff --git a/dist/index.js b/dist/index.js new file mode 100644 index 0000000..f03cb9d --- /dev/null +++ b/dist/index.js @@ -0,0 +1,32 @@ +"use strict"; +var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { + function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } + return new (P || (P = Promise))(function (resolve, reject) { + function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } + function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } + function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } + step((generator = generator.apply(thisArg, _arguments || [])).next()); + }); +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.GOOGLE_QUERY = exports.GOOGLE_IMG_SCRAP = void 0; +const parseGoogleImages_1 = require("./core/parseGoogleImages"); +const GOOGLE_QUERY_1 = require("./constant/query/GOOGLE_QUERY"); +exports.GOOGLE_QUERY = GOOGLE_QUERY_1.default; +const verifyGoogleQuery_1 = require("./core/verifyGoogleQuery"); +const constructGoogleUrl_1 = require("./core/constructGoogleUrl"); +const limitResultSize_1 = require("./core/limitResultSize"); +function GOOGLE_IMG_SCRAP(config) { + return __awaiter(this, void 0, void 0, function* () { + (0, verifyGoogleQuery_1.default)(config); + const URL = (0, constructGoogleUrl_1.default)(config); + const result = yield (0, parseGoogleImages_1.default)(URL); + const slicedResult = (0, limitResultSize_1.default)(config === null || config === void 0 ? void 0 : config.limit, result); + return { + url: URL, + result: slicedResult, + }; + }); +} +exports.GOOGLE_IMG_SCRAP = GOOGLE_IMG_SCRAP; +//# sourceMappingURL=index.js.map \ No newline at end of file diff --git a/dist/index.js.map b/dist/index.js.map new file mode 100644 index 0000000..41bd12e --- /dev/null +++ b/dist/index.js.map @@ -0,0 +1 @@ +{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,gEAAyD;AACzD,gEAAyD;AAoB9B,uBApBpB,sBAAY,CAoBoB;AAnBvC,gEAAyD;AACzD,kEAA2D;AAC3D,4DAAqD;AAIrD,SAAe,gBAAgB,CAAC,MAAc;;QAC5C,IAAA,2BAAiB,EAAC,MAAM,CAAC,CAAC;QAE1B,MAAM,GAAG,GAAG,IAAA,4BAAkB,EAAC,MAAM,CAAC,CAAC;QACvC,MAAM,MAAM,GAAG,MAAM,IAAA,2BAAiB,EAAC,GAAG,CAAC,CAAC;QAC5C,MAAM,YAAY,GAAG,IAAA,yBAAe,EAAC,MAAM,aAAN,MAAM,uBAAN,MAAM,CAAE,KAAK,EAAE,MAAM,CAAC,CAAC;QAE5D,OAAO;YACL,GAAG,EAAE,GAAG;YACR,MAAM,EAAE,YAAY;SACrB,CAAC;IACJ,CAAC;CAAA;AAEQ,4CAAgB"} \ No newline at end of file diff --git a/dist/utils/utils.d.ts b/dist/utils/utils.d.ts new file mode 100644 index 0000000..db581d4 --- /dev/null +++ b/dist/utils/utils.d.ts @@ -0,0 +1,19 @@ +/** + * Build the query for url + * @param query + * @returns + */ +declare function buildQuery(query: Record): string; +/** + * Transform unicode to char for more visibility + * @param text + * @returns + */ +declare function unicodeToChar(text: string): string; +/** + * Verify the url is an image + * @param content + * @returns + */ +declare function isImage(content?: string): boolean; +export { buildQuery, unicodeToChar, isImage }; diff --git a/dist/utils/utils.js b/dist/utils/utils.js new file mode 100644 index 0000000..39526cd --- /dev/null +++ b/dist/utils/utils.js @@ -0,0 +1,40 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.isImage = exports.unicodeToChar = exports.buildQuery = void 0; +const IMAGES_EXTENSIONS_json_1 = require("../constant/extensions/IMAGES_EXTENSIONS.json"); +/** + * Build the query for url + * @param query + * @returns + */ +function buildQuery(query) { + const result = []; + const params = Object.keys(query); + for (const param of params) { + const queryName = param; + result.push(`${queryName}=${encodeURIComponent(query[param])}`); + } + return "?" + result.join("&"); +} +exports.buildQuery = buildQuery; +/** + * Transform unicode to char for more visibility + * @param text + * @returns + */ +function unicodeToChar(text) { + return text.replace(/\\u[\dA-F]{4}/gi, function (match) { + return String.fromCharCode(parseInt(match.replace(/\\u/g, ""), 16)); + }); +} +exports.unicodeToChar = unicodeToChar; +/** + * Verify the url is an image + * @param content + * @returns + */ +function isImage(content = "") { + return IMAGES_EXTENSIONS_json_1.default.some((extension) => content.includes(extension)); +} +exports.isImage = isImage; +//# sourceMappingURL=utils.js.map \ No newline at end of file diff --git a/dist/utils/utils.js.map b/dist/utils/utils.js.map new file mode 100644 index 0000000..e4df97c --- /dev/null +++ b/dist/utils/utils.js.map @@ -0,0 +1 @@ +{"version":3,"file":"utils.js","sourceRoot":"","sources":["../../src/utils/utils.ts"],"names":[],"mappings":";;;AAAA,0FAAuE;AAEvE;;;;GAIG;AACH,SAAS,UAAU,CAAC,KAA6B;IAC/C,MAAM,MAAM,GAAG,EAAE,CAAC;IAClB,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAElC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE;QAC1B,MAAM,SAAS,GAAG,KAAK,CAAC;QACxB,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,IAAI,kBAAkB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;KACjE;IAED,OAAO,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAChC,CAAC;AAsBQ,gCAAU;AApBnB;;;;GAIG;AACH,SAAS,aAAa,CAAC,IAAY;IACjC,OAAO,IAAI,CAAC,OAAO,CAAC,iBAAiB,EAAE,UAAU,KAAK;QACpD,OAAO,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;AACL,CAAC;AAWoB,sCAAa;AATlC;;;;GAIG;AACH,SAAS,OAAO,CAAC,OAAO,GAAG,EAAE;IAC3B,OAAO,gCAAU,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;AACrE,CAAC;AAEmC,0BAAO"} \ No newline at end of file diff --git a/jest.config.ts b/jest.config.ts new file mode 100644 index 0000000..ab8b552 --- /dev/null +++ b/jest.config.ts @@ -0,0 +1,11 @@ +import type { Config } from "@jest/types"; + +const config: Config.InitialOptions = { + preset: "ts-jest", + rootDir: "test", + testEnvironment: "node", + verbose: true, + automock: true, +}; + +export default config; diff --git a/package.json b/package.json index 855cdd6..e93234a 100644 --- a/package.json +++ b/package.json @@ -1,14 +1,15 @@ { "name": "google-img-scrap", - "version": "1.0.8", - "description": "Scrap images from google images with customs pre filled dorking options", - "main": "./src/google-img-scrap.js", - "types": "./types/index.d.ts", + "version": "1.0.9", + "description": "Scrap images from google images with customs pre filled google dork options", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", "directories": { "test": "test" }, "scripts": { - "test": "node ./test/test.js" + "build": "tsc", + "test": "jest" }, "repository": { "type": "git", @@ -46,7 +47,15 @@ }, "homepage": "https://github.com/yoannchb-pro/google-img-scrap#readme", "dependencies": { - "fast-html-dom-parser": "^1.0.5", - "got": "^11.0.0" + "axios": "^1.2.3", + "fast-html-dom-parser": "^1.0.5" + }, + "devDependencies": { + "@types/jest": "^29.2.6", + "@types/node": "^18.11.18", + "jest": "^29.3.1", + "ts-jest": "^29.0.5", + "ts-node": "^10.9.1", + "typescript": "^4.7.4" } } diff --git a/src/back/google-img-scrap-1.0.7-.js b/src/back/google-img-scrap-1.0.7-.js deleted file mode 100644 index 763e37f..0000000 --- a/src/back/google-img-scrap-1.0.7-.js +++ /dev/null @@ -1,184 +0,0 @@ -const got = require("got"); -const { FastHTMLParser } = require("fast-html-dom-parser"); - -const { GOOGLE_CONSTANT } = require("../constant/GOOGLE_CONSTANT"); -const { GOOGLE_QUERY } = require("../constant/query/GOOGLE_QUERY"); -const EXTENSIONS = require("../constant/extensions/IMAGES_EXTENSIONS.json"); - -const { buildQuery, unicodeToChar } = require("../utils/UTILS"); - -/** - * Validation of the arguments passed - * @param {import("../../types").Config} config - */ -function verify(config) { - if (config.excludeDomains && config.domains) - throw "Can not set 'excludeDomains' and 'domains' as same times"; - - if (!config.search || config.search.trim() == "") - throw "'search' can not be empty"; - - if (config.query) { - const queryToVerify = Object.keys(GOOGLE_QUERY); - - for (const key of Object.keys(config.query)) { - if (!queryToVerify.includes(key)) throw `Invalide query name '${key}'`; - - const VALUES = Object.values(GOOGLE_QUERY[key]); - const ACTUAL_VALUE = config.query[key]; - if (!VALUES.includes(ACTUAL_VALUE)) - throw `'${ACTUAL_VALUE}' is not a valide argument for the query : '${key}'`; - } - } -} - -/** - * Verifify the url is an image - * @param {string} content - * @returns {boolean} - */ -function containImage(content = "") { - return EXTENSIONS.some((extension) => content.includes(extension)); -} - -/** - *Parse the html from google image to get the images links - * @param {string} url - * @returns {import("../../types").FinalResult[]} - */ -async function parse(url) { - const result = []; - - const response = await got(url, { - headers: GOOGLE_CONSTANT.headers, - }); - const parser = new FastHTMLParser(response.body); - - const scripts = parser.getElementsByTagName("script"); - - if (!scripts) return result; - - for (const script of scripts) { - const body = script.innerHTML; - - const valide = containImage(body); - - if (valide) { - const regex = /\["(http.+?)",(\d+),(\d+)\]/gi; - - let res = null; - - while ((res = regex.exec(body)) != null) { - if (res.length >= 4 && res[1].match(/http/gi).length < 2) - result.push({ - url: unicodeToChar(res[1]), - height: res[2], - width: res[3], - }); - } - } - } - - return result; -} - -/** - * Main function to build google image dork URL - * @param {import("../../types").Config} config - * @returns {import("../../types").Results} - */ -async function GOOGLE_IMG_SCRAP(config = {}) { - verify(config); - - //exclude domains - const EXCLUDE_DOMAINS = []; - if (config.excludeDomains) - config.excludeDomains.forEach((domain) => - EXCLUDE_DOMAINS.push(`-site:"${domain}"`) - ); - - //domains - const DOMAINS = []; - if (config.domains) - config.domains.forEach((domain) => DOMAINS.push(`site:"${domain}"`)); - - //exclude words - const EXCLUDE_WORDS = []; - if (config.excludeWords) - config.excludeWords.forEach((word) => EXCLUDE_WORDS.push(`-"${word}"`)); - - //filter by titles - const FILTER_TITLE = []; - if (config.filterByTitles) - config.filterByTitles.forEach((titleFilter) => { - const value = titleFilter.map((title) => { - return `intitle:"${title}"`; - }); - - FILTER_TITLE.push(`(${value.join(" AND ")})`); - }); - - //url match words - const URL_MATCH = []; - if (config.urlMatch) - config.urlMatch.forEach((urlMatch) => { - const value = urlMatch.map((content) => { - return `inurl:${content}`; - }); - - URL_MATCH.push(`(${value.join(" AND ")})`); - }); - - //building url - const SEARCH_TERM = - config.search + - " " + - URL_MATCH.join(" OR ") + - " " + - FILTER_TITLE.join(" OR ") + - " " + - EXCLUDE_WORDS.join(" ") + - " " + - EXCLUDE_DOMAINS.join(" ") + - " " + - DOMAINS.join(" OR "); - - const SEARCH = encodeURIComponent(SEARCH_TERM.trim()); - const QUERY = Object.assign(GOOGLE_CONSTANT.forceGoogleImage, { - [GOOGLE_CONSTANT.queryParam]: Object.values(config.query || {}).join(","), - q: SEARCH, - }); - - const CUSTOM_PARAM = config.custom ? `&${config.custom}` : ""; - const SAFE_SEARCH = config.safeSearch ? `&safe=active` : ""; - - const URL = - GOOGLE_CONSTANT.url + buildQuery(QUERY) + CUSTOM_PARAM + SAFE_SEARCH; - - //parsing - const result = await parse(URL); - - //excute function - let finalResult = []; - if (config.execute) - result.forEach((element) => { - const value = config.execute(element); - if (value) finalResult.push(value); - }); - else finalResult = result; - - //limit result - let slicedResult = []; - const { limit } = config; - - if (limit && limit > 0 && finalResult.length > limit) { - slicedResult = finalResult.slice(0, limit); - } - //result - return { - url: URL, - result: slicedResult.length > 0 ? slicedResult : finalResult, - }; -} - -module.exports = { GOOGLE_IMG_SCRAP, GOOGLE_QUERY }; diff --git a/src/constant/GOOGLE_CONSTANT.js b/src/constant/GOOGLE_CONSTANT.ts similarity index 90% rename from src/constant/GOOGLE_CONSTANT.js rename to src/constant/GOOGLE_CONSTANT.ts index da1bb6d..a2a3caa 100644 --- a/src/constant/GOOGLE_CONSTANT.js +++ b/src/constant/GOOGLE_CONSTANT.ts @@ -10,4 +10,4 @@ const GOOGLE_CONSTANT = { }, }; -module.exports = { GOOGLE_CONSTANT }; +export default GOOGLE_CONSTANT; diff --git a/src/constant/query/GOOGLE_COLORS.js b/src/constant/query/GOOGLE_COLORS.ts similarity index 83% rename from src/constant/query/GOOGLE_COLORS.js rename to src/constant/query/GOOGLE_COLORS.ts index 27fb4a9..f3648bc 100644 --- a/src/constant/query/GOOGLE_COLORS.js +++ b/src/constant/query/GOOGLE_COLORS.ts @@ -13,4 +13,4 @@ const COLORS = [ "brown", ]; -module.exports = { COLORS }; +export default COLORS; diff --git a/src/constant/query/GOOGLE_PARAMS.js b/src/constant/query/GOOGLE_PARAMS.js deleted file mode 100644 index b12c5a2..0000000 --- a/src/constant/query/GOOGLE_PARAMS.js +++ /dev/null @@ -1,17 +0,0 @@ -const SIZE_PARAM = "isz", - COLOR_PARAM = "ic", - SPECIFIC_COLOR_PARAM = "Cisc", - TYPE_PARAM = "itp", - DATE_PARAM = "qdr", - LICENCE_PARAM = "il", - IMAGE_EXTENSION_PARAM = "ift"; - -module.exports = { - SIZE_PARAM, - COLOR_PARAM, - SPECIFIC_COLOR_PARAM, - TYPE_PARAM, - DATE_PARAM, - LICENCE_PARAM, - IMAGE_EXTENSION_PARAM, -}; diff --git a/src/constant/query/GOOGLE_PARAMS.ts b/src/constant/query/GOOGLE_PARAMS.ts new file mode 100644 index 0000000..2152ff6 --- /dev/null +++ b/src/constant/query/GOOGLE_PARAMS.ts @@ -0,0 +1,11 @@ +const GOOGLE_PARAMS = { + SIZE: "isz", + COLOR: "ic", + SPECIFIC_COLOR: "Cisc", + TYPE: "itp", + DATE: "qdr", + LICENCE: "il", + IMAGE_EXTENSION: "ift", +}; + +export default GOOGLE_PARAMS; diff --git a/src/constant/query/GOOGLE_QUERY.js b/src/constant/query/GOOGLE_QUERY.js deleted file mode 100644 index d52f57f..0000000 --- a/src/constant/query/GOOGLE_QUERY.js +++ /dev/null @@ -1,59 +0,0 @@ -const { - SIZE_PARAM, - COLOR_PARAM, - SPECIFIC_COLOR_PARAM, - TYPE_PARAM, - DATE_PARAM, - LICENCE_PARAM, - IMAGE_EXTENSION_PARAM, -} = require("./GOOGLE_PARAMS"); -const { COLORS } = require("./GOOGLE_COLORS"); -const EXTENSIONS = require("../extensions/IMAGES_EXTENSIONS.json"); - -const GOOGLE_QUERY = { - SIZE: { - LARGE: SIZE_PARAM + ":l", - MEDIUM: SIZE_PARAM + ":m", - ICON: SIZE_PARAM + ":i", - }, - - COLOR: { - BLACK_AND_WHITE: COLOR_PARAM + ":gray", - TRANSPARENT: COLOR_PARAM + ":trans", - }, - - TYPE: { - CLIPART: TYPE_PARAM + ":clipart", - DRAW: TYPE_PARAM + ":lineart", - GIF: TYPE_PARAM + ":animated", - }, - - EXTENSION: {}, - - DATE: { - DAY: DATE_PARAM + ":d", - WEEK: DATE_PARAM + ":w", - MONTH: DATE_PARAM + ":m", - YEAR: DATE_PARAM + ":y", - }, - - LICENCE: { - CREATIVE_COMMONS: LICENCE_PARAM + ":cl", - COMMERCIAL_AND_OTHER: LICENCE_PARAM + ":ol", - }, -}; - -//build extension -EXTENSIONS.forEach((EXTENSION) => { - const queryName = EXTENSION.toUpperCase(); - GOOGLE_QUERY.EXTENSION[queryName] = IMAGE_EXTENSION_PARAM + ":" + EXTENSION; -}); - -//build colors -COLORS.forEach((COLOR) => { - const queryName = COLOR.toUpperCase(); - GOOGLE_QUERY.COLOR[queryName] = - COLOR_PARAM + ":specific," + SPECIFIC_COLOR_PARAM + ":" + COLOR; -}); - -module.exports = { GOOGLE_QUERY }; diff --git a/src/constant/query/GOOGLE_QUERY.ts b/src/constant/query/GOOGLE_QUERY.ts new file mode 100644 index 0000000..e0fe03d --- /dev/null +++ b/src/constant/query/GOOGLE_QUERY.ts @@ -0,0 +1,57 @@ +import GOOGLE_PARAMS from "./GOOGLE_PARAMS"; +import COLORS from "./GOOGLE_COLORS"; +import EXTENSIONS from "../extensions/IMAGES_EXTENSIONS.json"; +import GoogleQuery from "../../../types/googleQuery"; + +const GOOGLE_QUERY = { + SIZE: { + LARGE: GOOGLE_PARAMS.SIZE + ":l", + MEDIUM: GOOGLE_PARAMS.SIZE + ":m", + ICON: GOOGLE_PARAMS.SIZE + ":i", + }, + + COLOR: { + BLACK_AND_WHITE: GOOGLE_PARAMS.COLOR + ":gray", + TRANSPARENT: GOOGLE_PARAMS.COLOR + ":trans", + }, + + TYPE: { + CLIPART: GOOGLE_PARAMS.TYPE + ":clipart", + DRAW: GOOGLE_PARAMS.TYPE + ":lineart", + GIF: GOOGLE_PARAMS.TYPE + ":animated", + }, + + EXTENSION: {}, + + DATE: { + DAY: GOOGLE_PARAMS.DATE + ":d", + WEEK: GOOGLE_PARAMS.DATE + ":w", + MONTH: GOOGLE_PARAMS.DATE + ":m", + YEAR: GOOGLE_PARAMS.DATE + ":y", + }, + + LICENCE: { + CREATIVE_COMMONS: GOOGLE_PARAMS.LICENCE + ":cl", + COMMERCIAL_AND_OTHER: GOOGLE_PARAMS.LICENCE + ":ol", + }, +}; + +//build extension +for (const EXTENSION of EXTENSIONS) { + const queryName = EXTENSION.toUpperCase(); + (GOOGLE_QUERY as any).EXTENSION[queryName] = + GOOGLE_PARAMS.IMAGE_EXTENSION + ":" + EXTENSION; +} + +//build colors +for (const COLOR of COLORS) { + const queryName = COLOR.toUpperCase(); + (GOOGLE_QUERY as any).COLOR[queryName] = + GOOGLE_PARAMS.COLOR + + ":specific," + + GOOGLE_PARAMS.SPECIFIC_COLOR + + ":" + + COLOR; +} + +export default GOOGLE_QUERY as GoogleQuery; diff --git a/src/core/buildGoogleDork.ts b/src/core/buildGoogleDork.ts new file mode 100644 index 0000000..58e04b9 --- /dev/null +++ b/src/core/buildGoogleDork.ts @@ -0,0 +1,103 @@ +import Config from "../../types/config"; + +/** + * Show only images with a particular title + * @param config + * @returns + */ +function filterByTitlesBuilder(config: Config): string { + const FILTER_TITLE = []; + if (config.filterByTitles) { + for (const titleFilter of config.filterByTitles) { + const value = titleFilter.map((title) => { + return `intitle:"${title}"`; + }); + + FILTER_TITLE.push(`(${value.join(" AND ")})`); + } + } + return FILTER_TITLE.join(" "); +} + +/** + * Show only images without some specific words + * @param config + * @returns + */ +function excludeWordsBuilder(config: Config): string { + const EXCLUDE_WORDS = []; + if (config.excludeWords) { + for (const excludeWord of config.excludeWords) { + EXCLUDE_WORDS.push(`-"${excludeWord}"`); + } + } + return EXCLUDE_WORDS.join(" "); +} + +/** + * Show only images of some particular domains + * @param config + * @returns + */ +function onlyDomainsBuilder(config: Config): string { + const DOMAINS = []; + if (config.domains) { + for (const domain of config.domains) { + DOMAINS.push(`site:"${domain}"`); + } + } + return DOMAINS.join(" OR "); +} + +/** + * Don't show images from particular domains + * @param config + * @returns + */ +function excludeDomainsBuilder(config: Config): string { + const EXCLUDE_DOMAINS = []; + if (config.excludeDomains) { + for (const excludeDomain of config.excludeDomains) { + EXCLUDE_DOMAINS.push(`-site:"${excludeDomain}"`); + } + } + return EXCLUDE_DOMAINS.join(" "); +} + +/** + * Only show images with a domain that match a particular regex + * @param config + * @returns + */ +function urlMatchBuilder(config: Config): string { + const URL_MATCH = []; + if (config.urlMatch) { + for (const urlMatch of config.urlMatch) { + const value = urlMatch.map((content) => { + return `inurl:${content}`; + }); + URL_MATCH.push(`(${value.join(" AND ")})`); + } + } + return URL_MATCH.join(" OR "); +} + +/** + * Build google dork string based on the config query + * @param config + * @returns + */ +function buildGoogleDorks(config: Config): string { + return [ + config.search, + urlMatchBuilder(config), + excludeWordsBuilder(config), + excludeDomainsBuilder(config), + onlyDomainsBuilder(config), + filterByTitlesBuilder(config), + ] + .join(" ") + .trim(); +} + +export default buildGoogleDorks; diff --git a/src/core/constructGoogleUrl.ts b/src/core/constructGoogleUrl.ts new file mode 100644 index 0000000..f295196 --- /dev/null +++ b/src/core/constructGoogleUrl.ts @@ -0,0 +1,24 @@ +import Config from "../../types/config"; +import GOOGLE_CONSTANT from "../constant/GOOGLE_CONSTANT"; +import { buildQuery } from "../utils/utils"; +import buildGoogleDorks from "./buildGoogleDork"; + +/** + * Construct google url for scrapping + * @param config + * @returns + */ +function constructGoogleUrl(config: Config): string { + const GOOGLE_DORK = buildGoogleDorks(config); + const CUSTOM_PARAM = config.custom ? `&${config.custom}` : ""; + const SAFE_SEARCH = config.safeSearch ? `&safe=active` : ""; + + const QUERY = Object.assign(GOOGLE_CONSTANT.forceGoogleImage, { + [GOOGLE_CONSTANT.queryParam]: Object.values(config.query || {}).join(","), + q: GOOGLE_DORK, + }); + + return GOOGLE_CONSTANT.url + buildQuery(QUERY) + CUSTOM_PARAM + SAFE_SEARCH; +} + +export default constructGoogleUrl; diff --git a/src/core/limitResultSize.ts b/src/core/limitResultSize.ts new file mode 100644 index 0000000..263d354 --- /dev/null +++ b/src/core/limitResultSize.ts @@ -0,0 +1,22 @@ +import ImageResultItem from "../../types/imageResultItem"; + +/** + * Limit the result size + * @param config + * @param imagesItems + * @returns + */ +function limitResultSize( + limit: number, + imagesItems: ImageResultItem[] +): ImageResultItem[] { + let slicedResult: ImageResultItem[] = []; + + if (limit && limit > 0 && imagesItems.length > limit) { + slicedResult = imagesItems.slice(0, limit); + } + + return slicedResult.length > 0 ? slicedResult : imagesItems; +} + +export default limitResultSize; diff --git a/src/core/parseGoogleImages.ts b/src/core/parseGoogleImages.ts new file mode 100644 index 0000000..6bef934 --- /dev/null +++ b/src/core/parseGoogleImages.ts @@ -0,0 +1,71 @@ +import { isImage } from "../utils/utils"; +import GOOGLE_CONSTANT from "../constant/GOOGLE_CONSTANT"; +import axios from "axios"; +import ImageResultItem from "../../types/imageResultItem"; + +const { FastHTMLParser } = require("fast-html-dom-parser"); + +/** + * Parse the html from google image to get the images links + * @param url + * @returns + */ +async function parseGoogleImages(url: string): Promise { + const { data } = await axios(url, { + headers: GOOGLE_CONSTANT.headers, + }); + const parser = new FastHTMLParser(data); + const scripts = parser.getElementsByTagName("script"); + + const result: ImageResultItem[] = []; + + if (!scripts) return result; + + for (const script of scripts) { + const body = script.innerHTML; + + if (!isImage(body)) continue; + + //getting image url, height, width, average + const regex = + /\["(http[^"]+?)",(\d+),(\d+)\],[\w\d]+?,[\w\d]+?,"rgb\((\d+),(\d+),(\d+)\)"/gi; + //getting originalUrl, title, id + const secondRegex = /\[[\w\d]+?,"([^"]+?)","(http[^"]+?)","([^"]+?)"/gi; + + let res = null; + let secondRes = null; + + while ( + (res = regex.exec(body)) != null && + (secondRes = secondRegex.exec(body)) != null + ) { + if ( + res.length >= 4 && + res[1].match(/http/gi).length < 2 && + secondRes.length === 4 && + secondRes[2].match(/http/gi).length < 2 + ) { + const [r, g, b] = [res[4], res[5], res[6]].map((e) => parseInt(e, 10)); + + result.push({ + id: secondRes[1], + title: secondRes[3], + url: res[1], + originalUrl: secondRes[2], + averageColor: `rgb(${r}, ${g}, ${b})`, + averageColorObject: { + r, + g, + b, + }, + height: parseInt(res[2], 10), + width: parseInt(res[3], 10), + }); + } + } + } + + return result; +} + +export default parseGoogleImages; diff --git a/src/core/verifyGoogleQuery.ts b/src/core/verifyGoogleQuery.ts new file mode 100644 index 0000000..48b175b --- /dev/null +++ b/src/core/verifyGoogleQuery.ts @@ -0,0 +1,33 @@ +import Config from "../../types/config"; +import GoogleQuery from "../../types/googleQuery"; +import GOOGLE_QUERY from "../constant/query/GOOGLE_QUERY"; + +/** + * Validation of the query passed as argument + * @param config + */ +function verifyGoogleQuery(config: Config) { + if (config.excludeDomains && config.domains) + throw new Error("Can not set 'excludeDomains' and 'domains' as same times"); + + if (!config.search || config.search.trim() == "") + throw new Error("'search' can not be empty"); + + if (config.query) { + const queryToVerify = Object.keys(GOOGLE_QUERY); + + for (const key of Object.keys(config.query) as (keyof GoogleQuery)[]) { + if (!queryToVerify.includes(key)) + throw new Error(`Invalide query name '${key}'`); + + const VALUES = Object.values(GOOGLE_QUERY[key]); + const ACTUAL_VALUE = config.query[key] ?? ""; + if (!VALUES.includes(ACTUAL_VALUE)) + throw new Error( + `'${ACTUAL_VALUE}' is not a valide argument for the query : '${key}'` + ); + } + } +} + +export default verifyGoogleQuery; diff --git a/src/google-img-scrap.js b/src/google-img-scrap.js deleted file mode 100644 index 9fe32fb..0000000 --- a/src/google-img-scrap.js +++ /dev/null @@ -1,212 +0,0 @@ -const got = require("got"); -const { FastHTMLParser } = require("fast-html-dom-parser"); - -const { GOOGLE_CONSTANT } = require("./constant/GOOGLE_CONSTANT"); -const { GOOGLE_QUERY } = require("./constant/query/GOOGLE_QUERY"); -const EXTENSIONS = require("./constant/extensions/IMAGES_EXTENSIONS.json"); - -const { buildQuery, unicodeToChar } = require("./utils/UTILS"); - -/** - * Validation of the arguments passed - * @param {import("../types").Config} config - */ -function verify(config) { - if (config.excludeDomains && config.domains) - throw "Can not set 'excludeDomains' and 'domains' as same times"; - - if (!config.search || config.search.trim() == "") - throw "'search' can not be empty"; - - if (config.query) { - const queryToVerify = Object.keys(GOOGLE_QUERY); - - for (const key of Object.keys(config.query)) { - if (!queryToVerify.includes(key)) throw `Invalide query name '${key}'`; - - const VALUES = Object.values(GOOGLE_QUERY[key]); - const ACTUAL_VALUE = config.query[key]; - if (!VALUES.includes(ACTUAL_VALUE)) - throw `'${ACTUAL_VALUE}' is not a valide argument for the query : '${key}'`; - } - } -} - -/** - * Verifify the url is an image - * @param {string} content - * @returns {boolean} - */ -function containImage(content = "") { - return EXTENSIONS.some((extension) => content.includes(extension)); -} - -/** - *Parse the html from google image to get the images links - * @param {string} url - * @returns {import("../types").FinalResult[]} - */ -async function parse(url) { - const result = []; - - const response = await got(url, { - headers: GOOGLE_CONSTANT.headers, - }); - const parser = new FastHTMLParser(response.body); - - const scripts = parser.getElementsByTagName("script"); - - if (!scripts) return result; - - for (const script of scripts) { - const body = script.innerHTML; - - const valide = containImage(body); - - if (valide) { - //getting image url, height, width, average - const regex = - /\["(http[^"]+?)",(\d+),(\d+)\],[\w\d]+?,[\w\d]+?,"rgb\((\d+),(\d+),(\d+)\)"/gi; - - //getting originalUrl, title, id - const secondRegex = /\[[\w\d]+?,"([^"]+?)","(http[^"]+?)","([^"]+?)"/gi; - - let res = null; - let secondRes = null; - - while ( - (res = regex.exec(body)) != null && - (secondRes = secondRegex.exec(body)) != null - ) { - if ( - res.length >= 4 && - res[1].match(/http/gi).length < 2 && - secondRes.length === 4 && - secondRes[2].match(/http/gi).length < 2 - ) { - const [r, g, b] = [res[4], res[5], res[6]].map((e) => - parseInt(e, 10) - ); - - result.push({ - id: secondRes[1], - title: secondRes[3], - url: unicodeToChar(res[1]), - originalUrl: unicodeToChar(secondRes[2]), - averageColor: `rgb(${r}, ${g}, ${b})`, - averageColorObject: { - r, - g, - b, - }, - height: parseInt(res[2], 10), - width: parseInt(res[3], 10), - }); - } - } - } - } - - return result; -} - -/** - * Main function to build google image dork URL - * @param {import("../types").Config} config - * @returns {import("../types").Results} - */ -async function GOOGLE_IMG_SCRAP(config = {}) { - verify(config); - - //exclude domains - const EXCLUDE_DOMAINS = []; - if (config.excludeDomains) - config.excludeDomains.forEach((domain) => - EXCLUDE_DOMAINS.push(`-site:"${domain}"`) - ); - - //domains - const DOMAINS = []; - if (config.domains) - config.domains.forEach((domain) => DOMAINS.push(`site:"${domain}"`)); - - //exclude words - const EXCLUDE_WORDS = []; - if (config.excludeWords) - config.excludeWords.forEach((word) => EXCLUDE_WORDS.push(`-"${word}"`)); - - //filter by titles - const FILTER_TITLE = []; - if (config.filterByTitles) - config.filterByTitles.forEach((titleFilter) => { - const value = titleFilter.map((title) => { - return `intitle:"${title}"`; - }); - - FILTER_TITLE.push(`(${value.join(" AND ")})`); - }); - - //url match words - const URL_MATCH = []; - if (config.urlMatch) - config.urlMatch.forEach((urlMatch) => { - const value = urlMatch.map((content) => { - return `inurl:${content}`; - }); - - URL_MATCH.push(`(${value.join(" AND ")})`); - }); - - //building url - const SEARCH_TERM = - config.search + - " " + - URL_MATCH.join(" OR ") + - " " + - FILTER_TITLE.join(" OR ") + - " " + - EXCLUDE_WORDS.join(" ") + - " " + - EXCLUDE_DOMAINS.join(" ") + - " " + - DOMAINS.join(" OR "); - - const SEARCH = encodeURIComponent(SEARCH_TERM.trim()); - const QUERY = Object.assign(GOOGLE_CONSTANT.forceGoogleImage, { - [GOOGLE_CONSTANT.queryParam]: Object.values(config.query || {}).join(","), - q: SEARCH, - }); - - const CUSTOM_PARAM = config.custom ? `&${config.custom}` : ""; - const SAFE_SEARCH = config.safeSearch ? `&safe=active` : ""; - - const URL = - GOOGLE_CONSTANT.url + buildQuery(QUERY) + CUSTOM_PARAM + SAFE_SEARCH; - - //parsing - const result = await parse(URL); - - //excute function - let finalResult = []; - if (config.execute) - result.forEach((element) => { - const value = config.execute(element); - if (value) finalResult.push(value); - }); - else finalResult = result; - - //limit result - let slicedResult = []; - const { limit } = config; - - if (limit && limit > 0 && finalResult.length > limit) { - slicedResult = finalResult.slice(0, limit); - } - //result - return { - url: URL, - result: slicedResult.length > 0 ? slicedResult : finalResult, - }; -} - -module.exports = { GOOGLE_IMG_SCRAP, GOOGLE_QUERY }; diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..32099da --- /dev/null +++ b/src/index.ts @@ -0,0 +1,22 @@ +import parseGoogleImages from "./core/parseGoogleImages"; +import GOOGLE_QUERY from "./constant/query/GOOGLE_QUERY"; +import verifyGoogleQuery from "./core/verifyGoogleQuery"; +import constructGoogleUrl from "./core/constructGoogleUrl"; +import limitResultSize from "./core/limitResultSize"; +import Config from "../types/config"; +import Results from "../types/results"; + +async function GOOGLE_IMG_SCRAP(config: Config): Promise { + verifyGoogleQuery(config); + + const URL = constructGoogleUrl(config); + const result = await parseGoogleImages(URL); + const slicedResult = limitResultSize(config?.limit, result); + + return { + url: URL, + result: slicedResult, + }; +} + +export { GOOGLE_IMG_SCRAP, GOOGLE_QUERY }; diff --git a/src/utils/UTILS.js b/src/utils/UTILS.js deleted file mode 100644 index b97524b..0000000 --- a/src/utils/UTILS.js +++ /dev/null @@ -1,20 +0,0 @@ -function buildQuery(query) { - const result = []; - - const params = Object.keys(query); - - for (const param of params) { - const queryName = param; - result.push(`${queryName}=${query[param]}`); - } - - return "?" + result.join("&"); -} - -function unicodeToChar(text) { - return text.replace(/\\u[\dA-F]{4}/gi, function (match) { - return String.fromCharCode(parseInt(match.replace(/\\u/g, ""), 16)); - }); -} - -module.exports = { buildQuery, unicodeToChar }; diff --git a/src/utils/utils.ts b/src/utils/utils.ts new file mode 100644 index 0000000..3b4f691 --- /dev/null +++ b/src/utils/utils.ts @@ -0,0 +1,40 @@ +import EXTENSIONS from "../constant/extensions/IMAGES_EXTENSIONS.json"; + +/** + * Build the query for url + * @param query + * @returns + */ +function buildQuery(query: Record) { + const result = []; + const params = Object.keys(query); + + for (const param of params) { + const queryName = param; + result.push(`${queryName}=${encodeURIComponent(query[param])}`); + } + + return "?" + result.join("&"); +} + +/** + * Transform unicode to char for more visibility + * @param text + * @returns + */ +function unicodeToChar(text: string) { + return text.replace(/\\u[\dA-F]{4}/gi, function (match) { + return String.fromCharCode(parseInt(match.replace(/\\u/g, ""), 16)); + }); +} + +/** + * Verify the url is an image + * @param content + * @returns + */ +function isImage(content = "") { + return EXTENSIONS.some((extension) => content.includes(extension)); +} + +export { buildQuery, unicodeToChar, isImage }; diff --git a/test/test-filter-titles.js b/test-back/test-filter-titles.js similarity index 56% rename from test/test-filter-titles.js rename to test-back/test-filter-titles.js index f00416b..5d8cf14 100644 --- a/test/test-filter-titles.js +++ b/test-back/test-filter-titles.js @@ -1,4 +1,4 @@ -const { GOOGLE_IMG_SCRAP } = require("../src/google-img-scrap"); +const { GOOGLE_IMG_SCRAP } = require("../dist"); (async function () { const test = await GOOGLE_IMG_SCRAP({ @@ -7,9 +7,6 @@ const { GOOGLE_IMG_SCRAP } = require("../src/google-img-scrap"); ["draw", "white"], ["albino", "white"], ], - execute: function (element) { - if (!element.url.match("gstatic.com")) return element; - }, }); console.log(test, test.result.length); diff --git a/test/test-last-version.js b/test-back/test-last-version.js similarity index 86% rename from test/test-last-version.js rename to test-back/test-last-version.js index cb8fcf9..36f7ef0 100644 --- a/test/test-last-version.js +++ b/test-back/test-last-version.js @@ -1,4 +1,4 @@ -const { GOOGLE_IMG_SCRAP } = require("../src/google-img-scrap"); +const { GOOGLE_IMG_SCRAP } = require("../src"); const V107 = require("../src/back/google-img-scrap-1.0.7-.js"); // console.log(GOOGLE_QUERY); diff --git a/test/test-result-limit.js b/test-back/test-result-limit.js similarity index 100% rename from test/test-result-limit.js rename to test-back/test-result-limit.js diff --git a/test/test-simple.js b/test-back/test-simple.js similarity index 100% rename from test/test-simple.js rename to test-back/test-simple.js diff --git a/test/test-url-match.js b/test-back/test-url-match.js similarity index 100% rename from test/test-url-match.js rename to test-back/test-url-match.js diff --git a/test/test-wallpaper.js b/test-back/test-wallpaper.js similarity index 100% rename from test/test-wallpaper.js rename to test-back/test-wallpaper.js diff --git a/test/test.js b/test-back/test.js similarity index 78% rename from test/test.js rename to test-back/test.js index 3a55ee1..6805ec6 100644 --- a/test/test.js +++ b/test-back/test.js @@ -1,4 +1,4 @@ -const { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } = require("../src/google-img-scrap"); +const { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } = require("../dist/index.js"); // console.log(GOOGLE_QUERY); @@ -16,9 +16,7 @@ const { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } = require("../src/google-img-scrap"); excludeWords: ["black", "white"], //If you don't like black and white cats custom: "name=content&name2=content2", safeSearch: false, - execute: function (element) { - if (!element.url.match("gstatic.com")) return element; - }, + // excludeDomains: ["istockphoto.com", "alamy.com"] }); diff --git a/tsconfig.build.json b/tsconfig.build.json new file mode 100644 index 0000000..d6af876 --- /dev/null +++ b/tsconfig.build.json @@ -0,0 +1,4 @@ +{ + "extends": "./tsconfig", + "exclude": ["**/*.test.*", "**/__mocks__/*", "**/__tests__/*"] +} diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..fba79ac --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,16 @@ +{ + "compilerOptions": { + "module": "CommonJS", + "allowSyntheticDefaultImports": true, + "target": "ES6", + "noImplicitAny": true, + "moduleResolution": "node", + "sourceMap": true, + "outDir": "dist", + "resolveJsonModule": true, + "types": ["node"], + "declaration": true, + "typeRoots": ["types"] + }, + "include": ["src/**/*", "types/**/*"] +} diff --git a/types/config.d.ts b/types/config.d.ts new file mode 100644 index 0000000..95912aa --- /dev/null +++ b/types/config.d.ts @@ -0,0 +1,21 @@ +type Config = { + search: string; + limit?: number; + query?: { + TYPE?: string; + DATE?: string; + COLOR?: string; + SIZE?: string; + LICENCE?: string; + EXTENSION?: string; + }; + urlMatch?: string[][]; + domains?: string[]; + excludeWords?: string[]; + custom?: string; + safeSearch?: boolean; + excludeDomains?: string[]; + filterByTitles?: string[][]; +}; + +export default Config; diff --git a/types/googleQuery.d.ts b/types/googleQuery.d.ts new file mode 100644 index 0000000..762d32e --- /dev/null +++ b/types/googleQuery.d.ts @@ -0,0 +1,55 @@ +type GoogleQuery = { + SIZE: { + LARGE: string; + MEDIUM: string; + ICON: string; + }; + + COLOR: { + BLACK_AND_WHITE: string; + TRANSPARENT: string; + RED: string; + BLUE: string; + PURPLE: string; + ORANGE: string; + YELLOW: string; + GREEN: string; + TEAL: string; + PINK: string; + WHITE: string; + GRAY: string; + BLACK: string; + BROWN: string; + }; + + TYPE: { + CLIPART: string; + DRAW: string; + GIF: string; + }; + + EXTENSION: { + JPG: "jpg"; + GIF: "gif"; + BMP: "bmp"; + PNG: "png"; + SVG: "svg"; + WEBP: "webp"; + ICO: "ico"; + RAW: "raw"; + }; + + DATE: { + DAY: string; + WEEK: string; + MONTH: string; + YEAR: string; + }; + + LICENCE: { + CREATIVE_COMMONS: string; + COMMERCIAL_AND_OTHER: string; + }; +}; + +export default GoogleQuery; diff --git a/types/imageResultItem.d.ts b/types/imageResultItem.d.ts new file mode 100644 index 0000000..bb8ce8b --- /dev/null +++ b/types/imageResultItem.d.ts @@ -0,0 +1,16 @@ +type ImageResultItem = { + id: string; + title: string; + originalUrl: string; + url: string; + averageColor: string; + averageColorObject: { + r: number; + g: number; + b: number; + }; + height: number; + width: number; +}; + +export default ImageResultItem; diff --git a/types/index.d.ts b/types/index.d.ts deleted file mode 100644 index 3516a74..0000000 --- a/types/index.d.ts +++ /dev/null @@ -1,108 +0,0 @@ -type Config = { - search: string; - limit?: number; - query?: { - TYPE?: string; - DATE?: string; - COLOR?: string; - SIZE?: string; - LICENCE?: string; - EXTENSION?: string; - }; - domains?: string[]; - excludeWords?: string[]; - custom?: string; - safeSearch?: boolean; - excludeDomains?: string[]; - execute?: (element: FinalResult) => FinalResult | undefined; - filterByTitles?: [string[]]; -}; - -type FinalResult = { - id: string; - title: string; - originalUrl: string; - url: string; - averageColor: string; - averageColorObject: { - r: number; - g: number; - b: number; - }; - height: number; - width: number; -}; - -type Results = { - url: string; - result: FinalResult[]; -}; - -type GoogleQuery = { - SIZE: { - LARGE: string; - MEDIUM: string; - ICON: string; - }; - - COLOR: { - BLACK_AND_WHITE: string; - TRANSPARENT: string; - RED: string; - BLUE: string; - PURPLE: string; - ORANGE: string; - YELLOW: string; - GREEN: string; - TEAL: string; - PINK: string; - WHITE: string; - GRAY: string; - BLACK: string; - BROWN: string; - }; - - TYPE: { - CLIPART: string; - DRAW: string; - GIF: string; - }; - - EXTENSION: { - JPG: "jpg"; - GIF: "gif"; - BMP: "bmp"; - PNG: "png"; - SVG: "svg"; - WEBP: "webp"; - ICO: "ico"; - RAW: "raw"; - }; - - DATE: { - DAY: string; - WEEK: string; - MONTH: string; - YEAR: string; - }; - - LICENCE: { - CREATIVE_COMMONS: string; - COMMERCIAL_AND_OTHER: string; - }; -}; - -/** - * GOOGLE_IMG_SCRAP - * - * @param {Config} config - * @returns {Results} - */ -export declare function GOOGLE_IMG_SCRAP(config: Config): Results; - -/** - * GOOGLE_QUERY - * - * @returns {GoogleQuery} - */ -export declare const GOOGLE_QUERY: GoogleQuery; diff --git a/types/results.d.ts b/types/results.d.ts new file mode 100644 index 0000000..e133523 --- /dev/null +++ b/types/results.d.ts @@ -0,0 +1,8 @@ +import ImageResultItem from "./imageResultItem"; + +type Results = { + url: string; + result: ImageResultItem[]; +}; + +export default Results; From faaf616fd9d0211b18475314ffd90151f76bc75c Mon Sep 17 00:00:00 2001 From: yoannchb-pro <71560747+yoannchb-pro@users.noreply.github.com> Date: Sun, 22 Jan 2023 14:48:23 -0500 Subject: [PATCH 2/2] v1.0.9 --- CHANGELOG.md | 2 + README.md | 74 +++++++++---- dist/constant/query/GOOGLE_QUERY.js | 9 +- dist/constant/query/GOOGLE_QUERY.js.map | 2 +- dist/core/constructGoogleUrl.js | 7 +- dist/core/constructGoogleUrl.js.map | 2 +- dist/core/parseGoogleImages.d.ts | 3 +- dist/core/parseGoogleImages.js | 90 ++++++++++------ dist/core/parseGoogleImages.js.map | 2 +- dist/core/verifyGoogleQuery.js | 5 +- dist/core/verifyGoogleQuery.js.map | 2 +- dist/index.js | 15 +-- dist/index.js.map | 2 +- dist/utils/utils.js | 5 +- dist/utils/utils.js.map | 2 +- .../test-filter-titles.js => example/index.js | 6 +- jest.config.ts | 1 - package.json | 4 +- src/core/parseGoogleImages.ts | 102 ++++++++++++------ src/index.ts | 2 +- test-back/test-last-version.js | 19 ---- test-back/test-result-limit.js | 25 ----- test-back/test-simple.js | 11 -- test-back/test-url-match.js | 13 --- test-back/test-wallpaper.js | 21 ---- test-back/test.js | 24 ----- test/domains.test.ts | 17 +++ test/excludeDomains.test.ts | 19 ++++ test/excludeWords.test.ts | 14 +++ test/filterByTitles.test.ts | 20 ++++ test/limit.test.ts | 18 ++++ test/proxy.test.ts | 16 +++ test/query.test.ts | 18 ++++ test/urlMatch.test.ts | 21 ++++ tsconfig.json | 5 +- types/config.d.ts | 3 + 36 files changed, 368 insertions(+), 233 deletions(-) rename test-back/test-filter-titles.js => example/index.js (69%) delete mode 100644 test-back/test-last-version.js delete mode 100644 test-back/test-result-limit.js delete mode 100644 test-back/test-simple.js delete mode 100644 test-back/test-url-match.js delete mode 100644 test-back/test-wallpaper.js delete mode 100644 test-back/test.js create mode 100644 test/domains.test.ts create mode 100644 test/excludeDomains.test.ts create mode 100644 test/excludeWords.test.ts create mode 100644 test/filterByTitles.test.ts create mode 100644 test/limit.test.ts create mode 100644 test/proxy.test.ts create mode 100644 test/query.test.ts create mode 100644 test/urlMatch.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index c86bd6e..82198e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ - `urlMatch` added in types - All the code have been write back in typescript with a new structure - Removed `execute` +- Added `proxy` configuration +- Writed back all test with jest ### 1.0.8 diff --git a/README.md b/README.md index 9ad2978..ad0a5be 100644 --- a/README.md +++ b/README.md @@ -24,9 +24,10 @@ const { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } = require("google-img-scrap"); import { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } from "google-img-scrap"; ``` -## Query Params +## Options definition - "search" `string` what you want to search +- "proxy" `AxiosProxyConfig` configure a proxy with axios proxy - "excludeWords" `string[]` exclude some words from the search - "domains" `string[]` filter by domains - "excludeDomains" `string[]` exclude some domains @@ -41,27 +42,35 @@ import { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } from "google-img-scrap"; ```js { - url: 'https://images.google.com/search?tbm=isch&tbs=itp:clipart,qdr:y,ic:gray,isz:l,il:ol,ift:jpg&q=cats%20%20%20-%22black%22%20-%22white%22&name=content&name2=content2', + url: 'https://images.google.com/search?tbm=isch&tbs=&q=cats', result: [ { - id: "HA6fW6faerBfPM", - title: "CAT eating a fish", - originalUrl: "https://media.gettyimages.com/vectors/cat-article.html", - url: 'https://media.gettyimages.com/vectors/cat-eating-fish-vector-id1216628506', - averageColor: "rgb(241, 25, 60)", - averageColorObject: { r: 241, g: 25, b: 60}, - height: 1024, - width: 1024 + id: 'K6Qd9XWnQFQCoM', + title: 'Domestic cat', + url: 'https://i.natgeofe.com/n/548467d8-c5f1-4551-9f58-6817a8d2c45e/NationalGeographic_2572187_2x1.jpg', + originalUrl: 'https://www.nationalgeographic.com/animals/mammals/facts/domestic-cat', + averageColor: 'rgb(208, 189, 170)', + averageColorObject: { + r: 208, + g: 189, + b: 170 + }, + height: 1536, + width: 3072 }, { - id: "OPSfyUtrsrYUI", - title: "Cat", - originalUrl: "https://www.ariatrade.gr/images/products/2021/10/article.html", - url: 'https://www.ariatrade.gr/images/products/2021/10/110294_1.jpg', - averageColor: "rgb(201, 250, 65)", - averageColorObject: { r: 201, g: 250, b: 65}, - height: 768, - width: 1024 + id: 'HkevFQZ5DYu7oM', + title: 'Cat - Wikipedia', + url: 'https://upload.wikimedia.org/wikipedia/commons/1/15/Cat_August_2010-4.jpg', + originalUrl: 'https://en.wikipedia.org/wiki/Cat', + averageColor: 'rgb(128, 115, 96)', + averageColorObject: { + r: 128, + g: 115, + b: 96 + }, + height: 2226, + width: 3640 }, ... ] @@ -70,8 +79,6 @@ import { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } from "google-img-scrap"; ## How to use ? -**NOTE**: For the query parameter you need to set the name in upper case ! - ## Simple example Search cats images @@ -86,7 +93,8 @@ console.log(test); ## Custom query -All query options are optional (see below for all the options). You can combine as much as you want. +All query options are optional (see below for all the options) and need to be in uppercase. You can combine as much as you want. +Find all possible query options below. ```js const test = await GOOGLE_IMG_SCRAP({ @@ -112,6 +120,23 @@ const test = await GOOGLE_IMG_SCRAP({ console.log(test); ``` +## Proxy + +See axios documentation to setup the proxy + +```js +const test = await GOOGLE_IMG_SCRAP({ + search: "cats", + proxy: { + protocol: "https", + host: "example.com", + port: 8080, + }, +}); + +console.log(test); +``` + ## Domains Only scrap from a specific domain @@ -173,8 +198,6 @@ console.log(test); ## How urlMatch and filterByTitles work ? -- urlMatch work like filterByTiles - ```js const test = await GOOGLE_IMG_SCRAP({ search: "cats", @@ -183,6 +206,11 @@ const test = await GOOGLE_IMG_SCRAP({ ["draw", "white"], ["albino", "white"], ], + //will build something like this "(cdn and wikipedia) or (cdn istockphoto)" + urlMatch: [ + ["cdn", "wikipedia"], + ["cdn", "istockphoto"], + ], }); console.log(test); diff --git a/dist/constant/query/GOOGLE_QUERY.js b/dist/constant/query/GOOGLE_QUERY.js index 416fe76..bf8606a 100644 --- a/dist/constant/query/GOOGLE_QUERY.js +++ b/dist/constant/query/GOOGLE_QUERY.js @@ -1,8 +1,11 @@ "use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; Object.defineProperty(exports, "__esModule", { value: true }); -const GOOGLE_PARAMS_1 = require("./GOOGLE_PARAMS"); -const GOOGLE_COLORS_1 = require("./GOOGLE_COLORS"); -const IMAGES_EXTENSIONS_json_1 = require("../extensions/IMAGES_EXTENSIONS.json"); +const GOOGLE_PARAMS_1 = __importDefault(require("./GOOGLE_PARAMS")); +const GOOGLE_COLORS_1 = __importDefault(require("./GOOGLE_COLORS")); +const IMAGES_EXTENSIONS_json_1 = __importDefault(require("../extensions/IMAGES_EXTENSIONS.json")); const GOOGLE_QUERY = { SIZE: { LARGE: GOOGLE_PARAMS_1.default.SIZE + ":l", diff --git a/dist/constant/query/GOOGLE_QUERY.js.map b/dist/constant/query/GOOGLE_QUERY.js.map index 9625c2f..3e35d77 100644 --- a/dist/constant/query/GOOGLE_QUERY.js.map +++ b/dist/constant/query/GOOGLE_QUERY.js.map @@ -1 +1 @@ -{"version":3,"file":"GOOGLE_QUERY.js","sourceRoot":"","sources":["../../../src/constant/query/GOOGLE_QUERY.ts"],"names":[],"mappings":";;AAAA,mDAA4C;AAC5C,mDAAqC;AACrC,iFAA8D;AAG9D,MAAM,YAAY,GAAG;IACnB,IAAI,EAAE;QACJ,KAAK,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;QAChC,MAAM,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;QACjC,IAAI,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;KAChC;IAED,KAAK,EAAE;QACL,eAAe,EAAE,uBAAa,CAAC,KAAK,GAAG,OAAO;QAC9C,WAAW,EAAE,uBAAa,CAAC,KAAK,GAAG,QAAQ;KAC5C;IAED,IAAI,EAAE;QACJ,OAAO,EAAE,uBAAa,CAAC,IAAI,GAAG,UAAU;QACxC,IAAI,EAAE,uBAAa,CAAC,IAAI,GAAG,UAAU;QACrC,GAAG,EAAE,uBAAa,CAAC,IAAI,GAAG,WAAW;KACtC;IAED,SAAS,EAAE,EAAE;IAEb,IAAI,EAAE;QACJ,GAAG,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;QAC9B,IAAI,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;QAC/B,KAAK,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;QAChC,IAAI,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;KAChC;IAED,OAAO,EAAE;QACP,gBAAgB,EAAE,uBAAa,CAAC,OAAO,GAAG,KAAK;QAC/C,oBAAoB,EAAE,uBAAa,CAAC,OAAO,GAAG,KAAK;KACpD;CACF,CAAC;AAEF,iBAAiB;AACjB,KAAK,MAAM,SAAS,IAAI,gCAAU,EAAE;IAClC,MAAM,SAAS,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC;IACzC,YAAoB,CAAC,SAAS,CAAC,SAAS,CAAC;QACxC,uBAAa,CAAC,eAAe,GAAG,GAAG,GAAG,SAAS,CAAC;CACnD;AAED,cAAc;AACd,KAAK,MAAM,KAAK,IAAI,uBAAM,EAAE;IAC1B,MAAM,SAAS,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;IACrC,YAAoB,CAAC,KAAK,CAAC,SAAS,CAAC;QACpC,uBAAa,CAAC,KAAK;YACnB,YAAY;YACZ,uBAAa,CAAC,cAAc;YAC5B,GAAG;YACH,KAAK,CAAC;CACT;AAED,kBAAe,YAA2B,CAAC"} \ No newline at end of file +{"version":3,"file":"GOOGLE_QUERY.js","sourceRoot":"","sources":["../../../src/constant/query/GOOGLE_QUERY.ts"],"names":[],"mappings":";;;;;AAAA,oEAA4C;AAC5C,oEAAqC;AACrC,kGAA8D;AAG9D,MAAM,YAAY,GAAG;IACnB,IAAI,EAAE;QACJ,KAAK,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;QAChC,MAAM,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;QACjC,IAAI,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;KAChC;IAED,KAAK,EAAE;QACL,eAAe,EAAE,uBAAa,CAAC,KAAK,GAAG,OAAO;QAC9C,WAAW,EAAE,uBAAa,CAAC,KAAK,GAAG,QAAQ;KAC5C;IAED,IAAI,EAAE;QACJ,OAAO,EAAE,uBAAa,CAAC,IAAI,GAAG,UAAU;QACxC,IAAI,EAAE,uBAAa,CAAC,IAAI,GAAG,UAAU;QACrC,GAAG,EAAE,uBAAa,CAAC,IAAI,GAAG,WAAW;KACtC;IAED,SAAS,EAAE,EAAE;IAEb,IAAI,EAAE;QACJ,GAAG,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;QAC9B,IAAI,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;QAC/B,KAAK,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;QAChC,IAAI,EAAE,uBAAa,CAAC,IAAI,GAAG,IAAI;KAChC;IAED,OAAO,EAAE;QACP,gBAAgB,EAAE,uBAAa,CAAC,OAAO,GAAG,KAAK;QAC/C,oBAAoB,EAAE,uBAAa,CAAC,OAAO,GAAG,KAAK;KACpD;CACF,CAAC;AAEF,iBAAiB;AACjB,KAAK,MAAM,SAAS,IAAI,gCAAU,EAAE;IAClC,MAAM,SAAS,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC;IACzC,YAAoB,CAAC,SAAS,CAAC,SAAS,CAAC;QACxC,uBAAa,CAAC,eAAe,GAAG,GAAG,GAAG,SAAS,CAAC;CACnD;AAED,cAAc;AACd,KAAK,MAAM,KAAK,IAAI,uBAAM,EAAE;IAC1B,MAAM,SAAS,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;IACrC,YAAoB,CAAC,KAAK,CAAC,SAAS,CAAC;QACpC,uBAAa,CAAC,KAAK;YACnB,YAAY;YACZ,uBAAa,CAAC,cAAc;YAC5B,GAAG;YACH,KAAK,CAAC;CACT;AAED,kBAAe,YAA2B,CAAC"} \ No newline at end of file diff --git a/dist/core/constructGoogleUrl.js b/dist/core/constructGoogleUrl.js index 77b75e3..8a60c9c 100644 --- a/dist/core/constructGoogleUrl.js +++ b/dist/core/constructGoogleUrl.js @@ -1,8 +1,11 @@ "use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; Object.defineProperty(exports, "__esModule", { value: true }); -const GOOGLE_CONSTANT_1 = require("../constant/GOOGLE_CONSTANT"); +const GOOGLE_CONSTANT_1 = __importDefault(require("../constant/GOOGLE_CONSTANT")); const utils_1 = require("../utils/utils"); -const buildGoogleDork_1 = require("./buildGoogleDork"); +const buildGoogleDork_1 = __importDefault(require("./buildGoogleDork")); /** * Construct google url for scrapping * @param config diff --git a/dist/core/constructGoogleUrl.js.map b/dist/core/constructGoogleUrl.js.map index 7b107a1..556edc2 100644 --- a/dist/core/constructGoogleUrl.js.map +++ b/dist/core/constructGoogleUrl.js.map @@ -1 +1 @@ -{"version":3,"file":"constructGoogleUrl.js","sourceRoot":"","sources":["../../src/core/constructGoogleUrl.ts"],"names":[],"mappings":";;AACA,iEAA0D;AAC1D,0CAA4C;AAC5C,uDAAiD;AAEjD;;;;GAIG;AACH,SAAS,kBAAkB,CAAC,MAAc;IACxC,MAAM,WAAW,GAAG,IAAA,yBAAgB,EAAC,MAAM,CAAC,CAAC;IAC7C,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9D,MAAM,WAAW,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC;IAE5D,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,yBAAe,CAAC,gBAAgB,EAAE;QAC5D,CAAC,yBAAe,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;QACzE,CAAC,EAAE,WAAW;KACf,CAAC,CAAC;IAEH,OAAO,yBAAe,CAAC,GAAG,GAAG,IAAA,kBAAU,EAAC,KAAK,CAAC,GAAG,YAAY,GAAG,WAAW,CAAC;AAC9E,CAAC;AAED,kBAAe,kBAAkB,CAAC"} \ No newline at end of file +{"version":3,"file":"constructGoogleUrl.js","sourceRoot":"","sources":["../../src/core/constructGoogleUrl.ts"],"names":[],"mappings":";;;;;AACA,kFAA0D;AAC1D,0CAA4C;AAC5C,wEAAiD;AAEjD;;;;GAIG;AACH,SAAS,kBAAkB,CAAC,MAAc;IACxC,MAAM,WAAW,GAAG,IAAA,yBAAgB,EAAC,MAAM,CAAC,CAAC;IAC7C,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9D,MAAM,WAAW,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC;IAE5D,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,yBAAe,CAAC,gBAAgB,EAAE;QAC5D,CAAC,yBAAe,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;QACzE,CAAC,EAAE,WAAW;KACf,CAAC,CAAC;IAEH,OAAO,yBAAe,CAAC,GAAG,GAAG,IAAA,kBAAU,EAAC,KAAK,CAAC,GAAG,YAAY,GAAG,WAAW,CAAC;AAC9E,CAAC;AAED,kBAAe,kBAAkB,CAAC"} \ No newline at end of file diff --git a/dist/core/parseGoogleImages.d.ts b/dist/core/parseGoogleImages.d.ts index 2183abe..753b677 100644 --- a/dist/core/parseGoogleImages.d.ts +++ b/dist/core/parseGoogleImages.d.ts @@ -1,8 +1,9 @@ +import { AxiosProxyConfig } from "axios"; import ImageResultItem from "../../types/imageResultItem"; /** * Parse the html from google image to get the images links * @param url * @returns */ -declare function parseGoogleImages(url: string): Promise; +declare function parseGoogleImages(url: string, proxy?: AxiosProxyConfig): Promise; export default parseGoogleImages; diff --git a/dist/core/parseGoogleImages.js b/dist/core/parseGoogleImages.js index a964622..89946fa 100644 --- a/dist/core/parseGoogleImages.js +++ b/dist/core/parseGoogleImages.js @@ -8,59 +8,81 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; Object.defineProperty(exports, "__esModule", { value: true }); const utils_1 = require("../utils/utils"); -const GOOGLE_CONSTANT_1 = require("../constant/GOOGLE_CONSTANT"); -const axios_1 = require("axios"); +const GOOGLE_CONSTANT_1 = __importDefault(require("../constant/GOOGLE_CONSTANT")); +const axios_1 = __importDefault(require("axios")); const { FastHTMLParser } = require("fast-html-dom-parser"); /** - * Parse the html from google image to get the images links + * Scrap google images scripts tag * @param url * @returns */ -function parseGoogleImages(url) { +function scrapGoogleImagesScriptsTag(url, proxy) { return __awaiter(this, void 0, void 0, function* () { - const { data } = yield (0, axios_1.default)(url, { - headers: GOOGLE_CONSTANT_1.default.headers, - }); + const { data } = yield (0, axios_1.default)(url, Object.assign({ headers: GOOGLE_CONSTANT_1.default.headers }, (proxy !== null && proxy !== void 0 ? proxy : {}))); const parser = new FastHTMLParser(data); const scripts = parser.getElementsByTagName("script"); + return scripts; + }); +} +function getGoogleImageObject(informationsMatch, otherInformationsMatch) { + const [r, g, b] = [ + informationsMatch[4], + informationsMatch[5], + informationsMatch[6], + ].map((e) => parseInt(e, 10)); + return { + id: otherInformationsMatch[1], + title: otherInformationsMatch[3], + url: informationsMatch[1], + originalUrl: otherInformationsMatch[2], + averageColor: `rgb(${r}, ${g}, ${b})`, + averageColorObject: { + r, + g, + b, + }, + height: parseInt(informationsMatch[2], 10), + width: parseInt(informationsMatch[3], 10), + }; +} +/** + * Parse the html from google image to get the images links + * @param url + * @returns + */ +function parseGoogleImages(url, proxy) { + return __awaiter(this, void 0, void 0, function* () { const result = []; + const scripts = yield scrapGoogleImagesScriptsTag(url, proxy); if (!scripts) return result; for (const script of scripts) { const body = script.innerHTML; + // if we dont find any image extension we can skip if (!(0, utils_1.isImage)(body)) continue; - //getting image url, height, width, average - const regex = /\["(http[^"]+?)",(\d+),(\d+)\],[\w\d]+?,[\w\d]+?,"rgb\((\d+),(\d+),(\d+)\)"/gi; + //getting image url, height, width, color average + const informationsRegex = /\["(http[^"]+?)",(\d+),(\d+)\],[\w\d]+?,[\w\d]+?,"rgb\((\d+),(\d+),(\d+)\)"/gi; //getting originalUrl, title, id - const secondRegex = /\[[\w\d]+?,"([^"]+?)","(http[^"]+?)","([^"]+?)"/gi; - let res = null; - let secondRes = null; - while ((res = regex.exec(body)) != null && - (secondRes = secondRegex.exec(body)) != null) { - if (res.length >= 4 && - res[1].match(/http/gi).length < 2 && - secondRes.length === 4 && - secondRes[2].match(/http/gi).length < 2) { - const [r, g, b] = [res[4], res[5], res[6]].map((e) => parseInt(e, 10)); - result.push({ - id: secondRes[1], - title: secondRes[3], - url: res[1], - originalUrl: secondRes[2], - averageColor: `rgb(${r}, ${g}, ${b})`, - averageColorObject: { - r, - g, - b, - }, - height: parseInt(res[2], 10), - width: parseInt(res[3], 10), - }); - } + const otherInformationsRegex = /\[[\w\d]+?,"([^"]+?)","(http[^"]+?)","([^"]+?)"/gi; + let informationsMatch, otherInformationsMatch; + while ((informationsMatch = informationsRegex.exec(body)) !== null && + (otherInformationsMatch = otherInformationsRegex.exec(body)) !== null) { + if (informationsMatch.length < 4 || otherInformationsMatch.length < 4) + continue; + if (informationsMatch[1].match(/http/gi).length > 2 || + otherInformationsMatch[2].match(/http/gi).length > 2) + continue; + result.push(getGoogleImageObject(informationsMatch, otherInformationsMatch)); } + //if we get the correct scripts with all images we can exit + if (result.length > 0) + return result; } return result; }); diff --git a/dist/core/parseGoogleImages.js.map b/dist/core/parseGoogleImages.js.map index b7fab10..3f2a5ea 100644 --- a/dist/core/parseGoogleImages.js.map +++ b/dist/core/parseGoogleImages.js.map @@ -1 +1 @@ -{"version":3,"file":"parseGoogleImages.js","sourceRoot":"","sources":["../../src/core/parseGoogleImages.ts"],"names":[],"mappings":";;;;;;;;;;;AAAA,0CAAyC;AACzC,iEAA0D;AAC1D,iCAA0B;AAG1B,MAAM,EAAE,cAAc,EAAE,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;AAE3D;;;;GAIG;AACH,SAAe,iBAAiB,CAAC,GAAW;;QAC1C,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,IAAA,eAAK,EAAC,GAAG,EAAE;YAChC,OAAO,EAAE,yBAAe,CAAC,OAAO;SACjC,CAAC,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,cAAc,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,OAAO,GAAG,MAAM,CAAC,oBAAoB,CAAC,QAAQ,CAAC,CAAC;QAEtD,MAAM,MAAM,GAAsB,EAAE,CAAC;QAErC,IAAI,CAAC,OAAO;YAAE,OAAO,MAAM,CAAC;QAE5B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;YAC5B,MAAM,IAAI,GAAG,MAAM,CAAC,SAAS,CAAC;YAE9B,IAAI,CAAC,IAAA,eAAO,EAAC,IAAI,CAAC;gBAAE,SAAS;YAE7B,2CAA2C;YAC3C,MAAM,KAAK,GACT,+EAA+E,CAAC;YAClF,gCAAgC;YAChC,MAAM,WAAW,GAAG,mDAAmD,CAAC;YAExE,IAAI,GAAG,GAAG,IAAI,CAAC;YACf,IAAI,SAAS,GAAG,IAAI,CAAC;YAErB,OACE,CAAC,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI;gBAChC,CAAC,SAAS,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,IAAI,EAC5C;gBACA,IACE,GAAG,CAAC,MAAM,IAAI,CAAC;oBACf,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,MAAM,GAAG,CAAC;oBACjC,SAAS,CAAC,MAAM,KAAK,CAAC;oBACtB,SAAS,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,MAAM,GAAG,CAAC,EACvC;oBACA,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;oBAEvE,MAAM,CAAC,IAAI,CAAC;wBACV,EAAE,EAAE,SAAS,CAAC,CAAC,CAAC;wBAChB,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC;wBACnB,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;wBACX,WAAW,EAAE,SAAS,CAAC,CAAC,CAAC;wBACzB,YAAY,EAAE,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG;wBACrC,kBAAkB,EAAE;4BAClB,CAAC;4BACD,CAAC;4BACD,CAAC;yBACF;wBACD,MAAM,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;wBAC5B,KAAK,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;qBAC5B,CAAC,CAAC;iBACJ;aACF;SACF;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CAAA;AAED,kBAAe,iBAAiB,CAAC"} \ No newline at end of file +{"version":3,"file":"parseGoogleImages.js","sourceRoot":"","sources":["../../src/core/parseGoogleImages.ts"],"names":[],"mappings":";;;;;;;;;;;;;;AAAA,0CAAyC;AACzC,kFAA0D;AAC1D,kDAAgD;AAGhD,MAAM,EAAE,cAAc,EAAE,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;AAE3D;;;;GAIG;AACH,SAAe,2BAA2B,CACxC,GAAW,EACX,KAAwB;;QAExB,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,IAAA,eAAK,EAAC,GAAG,kBAC9B,OAAO,EAAE,yBAAe,CAAC,OAAO,IAC7B,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAC,EAChB,CAAC;QAEH,MAAM,MAAM,GAAG,IAAI,cAAc,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,OAAO,GAAG,MAAM,CAAC,oBAAoB,CAAC,QAAQ,CAAC,CAAC;QAEtD,OAAO,OAAO,CAAC;IACjB,CAAC;CAAA;AAED,SAAS,oBAAoB,CAC3B,iBAAkC,EAClC,sBAAuC;IAEvC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,GAAG;QAChB,iBAAiB,CAAC,CAAC,CAAC;QACpB,iBAAiB,CAAC,CAAC,CAAC;QACpB,iBAAiB,CAAC,CAAC,CAAC;KACrB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;IAE9B,OAAO;QACL,EAAE,EAAE,sBAAsB,CAAC,CAAC,CAAC;QAC7B,KAAK,EAAE,sBAAsB,CAAC,CAAC,CAAC;QAChC,GAAG,EAAE,iBAAiB,CAAC,CAAC,CAAC;QACzB,WAAW,EAAE,sBAAsB,CAAC,CAAC,CAAC;QACtC,YAAY,EAAE,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG;QACrC,kBAAkB,EAAE;YAClB,CAAC;YACD,CAAC;YACD,CAAC;SACF;QACD,MAAM,EAAE,QAAQ,CAAC,iBAAiB,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QAC1C,KAAK,EAAE,QAAQ,CAAC,iBAAiB,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;KAC1C,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,SAAe,iBAAiB,CAC9B,GAAW,EACX,KAAwB;;QAExB,MAAM,MAAM,GAAsB,EAAE,CAAC;QAErC,MAAM,OAAO,GAAG,MAAM,2BAA2B,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAE9D,IAAI,CAAC,OAAO;YAAE,OAAO,MAAM,CAAC;QAE5B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;YAC5B,MAAM,IAAI,GAAG,MAAM,CAAC,SAAS,CAAC;YAE9B,kDAAkD;YAClD,IAAI,CAAC,IAAA,eAAO,EAAC,IAAI,CAAC;gBAAE,SAAS;YAE7B,iDAAiD;YACjD,MAAM,iBAAiB,GACrB,+EAA+E,CAAC;YAClF,gCAAgC;YAChC,MAAM,sBAAsB,GAC1B,mDAAmD,CAAC;YAEtD,IAAI,iBAAkC,EACpC,sBAAuC,CAAC;YAE1C,OACE,CAAC,iBAAiB,GAAG,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI;gBAC3D,CAAC,sBAAsB,GAAG,sBAAsB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EACrE;gBACA,IAAI,iBAAiB,CAAC,MAAM,GAAG,CAAC,IAAI,sBAAsB,CAAC,MAAM,GAAG,CAAC;oBACnE,SAAS;gBACX,IACE,iBAAiB,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,MAAM,GAAG,CAAC;oBAC/C,sBAAsB,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,MAAM,GAAG,CAAC;oBAEpD,SAAS;gBAEX,MAAM,CAAC,IAAI,CACT,oBAAoB,CAAC,iBAAiB,EAAE,sBAAsB,CAAC,CAChE,CAAC;aACH;YAED,2DAA2D;YAC3D,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,MAAM,CAAC;SACtC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CAAA;AAED,kBAAe,iBAAiB,CAAC"} \ No newline at end of file diff --git a/dist/core/verifyGoogleQuery.js b/dist/core/verifyGoogleQuery.js index 4d7183e..0262579 100644 --- a/dist/core/verifyGoogleQuery.js +++ b/dist/core/verifyGoogleQuery.js @@ -1,6 +1,9 @@ "use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; Object.defineProperty(exports, "__esModule", { value: true }); -const GOOGLE_QUERY_1 = require("../constant/query/GOOGLE_QUERY"); +const GOOGLE_QUERY_1 = __importDefault(require("../constant/query/GOOGLE_QUERY")); /** * Validation of the query passed as argument * @param config diff --git a/dist/core/verifyGoogleQuery.js.map b/dist/core/verifyGoogleQuery.js.map index 63c698a..a627a56 100644 --- a/dist/core/verifyGoogleQuery.js.map +++ b/dist/core/verifyGoogleQuery.js.map @@ -1 +1 @@ -{"version":3,"file":"verifyGoogleQuery.js","sourceRoot":"","sources":["../../src/core/verifyGoogleQuery.ts"],"names":[],"mappings":";;AAEA,iEAA0D;AAE1D;;;GAGG;AACH,SAAS,iBAAiB,CAAC,MAAc;;IACvC,IAAI,MAAM,CAAC,cAAc,IAAI,MAAM,CAAC,OAAO;QACzC,MAAM,IAAI,KAAK,CAAC,0DAA0D,CAAC,CAAC;IAE9E,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,IAAI,EAAE;QAC9C,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;IAE/C,IAAI,MAAM,CAAC,KAAK,EAAE;QAChB,MAAM,aAAa,GAAG,MAAM,CAAC,IAAI,CAAC,sBAAY,CAAC,CAAC;QAEhD,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAA0B,EAAE;YACpE,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,GAAG,CAAC;gBAC9B,MAAM,IAAI,KAAK,CAAC,wBAAwB,GAAG,GAAG,CAAC,CAAC;YAElD,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,sBAAY,CAAC,GAAG,CAAC,CAAC,CAAC;YAChD,MAAM,YAAY,GAAG,MAAA,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,mCAAI,EAAE,CAAC;YAC7C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,YAAY,CAAC;gBAChC,MAAM,IAAI,KAAK,CACb,IAAI,YAAY,+CAA+C,GAAG,GAAG,CACtE,CAAC;SACL;KACF;AACH,CAAC;AAED,kBAAe,iBAAiB,CAAC"} \ No newline at end of file +{"version":3,"file":"verifyGoogleQuery.js","sourceRoot":"","sources":["../../src/core/verifyGoogleQuery.ts"],"names":[],"mappings":";;;;;AAEA,kFAA0D;AAE1D;;;GAGG;AACH,SAAS,iBAAiB,CAAC,MAAc;;IACvC,IAAI,MAAM,CAAC,cAAc,IAAI,MAAM,CAAC,OAAO;QACzC,MAAM,IAAI,KAAK,CAAC,0DAA0D,CAAC,CAAC;IAE9E,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,IAAI,EAAE;QAC9C,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;IAE/C,IAAI,MAAM,CAAC,KAAK,EAAE;QAChB,MAAM,aAAa,GAAG,MAAM,CAAC,IAAI,CAAC,sBAAY,CAAC,CAAC;QAEhD,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAA0B,EAAE;YACpE,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,GAAG,CAAC;gBAC9B,MAAM,IAAI,KAAK,CAAC,wBAAwB,GAAG,GAAG,CAAC,CAAC;YAElD,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,sBAAY,CAAC,GAAG,CAAC,CAAC,CAAC;YAChD,MAAM,YAAY,GAAG,MAAA,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,mCAAI,EAAE,CAAC;YAC7C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,YAAY,CAAC;gBAChC,MAAM,IAAI,KAAK,CACb,IAAI,YAAY,+CAA+C,GAAG,GAAG,CACtE,CAAC;SACL;KACF;AACH,CAAC;AAED,kBAAe,iBAAiB,CAAC"} \ No newline at end of file diff --git a/dist/index.js b/dist/index.js index f03cb9d..7cfcb72 100644 --- a/dist/index.js +++ b/dist/index.js @@ -8,19 +8,22 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; Object.defineProperty(exports, "__esModule", { value: true }); exports.GOOGLE_QUERY = exports.GOOGLE_IMG_SCRAP = void 0; -const parseGoogleImages_1 = require("./core/parseGoogleImages"); -const GOOGLE_QUERY_1 = require("./constant/query/GOOGLE_QUERY"); +const parseGoogleImages_1 = __importDefault(require("./core/parseGoogleImages")); +const GOOGLE_QUERY_1 = __importDefault(require("./constant/query/GOOGLE_QUERY")); exports.GOOGLE_QUERY = GOOGLE_QUERY_1.default; -const verifyGoogleQuery_1 = require("./core/verifyGoogleQuery"); -const constructGoogleUrl_1 = require("./core/constructGoogleUrl"); -const limitResultSize_1 = require("./core/limitResultSize"); +const verifyGoogleQuery_1 = __importDefault(require("./core/verifyGoogleQuery")); +const constructGoogleUrl_1 = __importDefault(require("./core/constructGoogleUrl")); +const limitResultSize_1 = __importDefault(require("./core/limitResultSize")); function GOOGLE_IMG_SCRAP(config) { return __awaiter(this, void 0, void 0, function* () { (0, verifyGoogleQuery_1.default)(config); const URL = (0, constructGoogleUrl_1.default)(config); - const result = yield (0, parseGoogleImages_1.default)(URL); + const result = yield (0, parseGoogleImages_1.default)(URL, config.proxy); const slicedResult = (0, limitResultSize_1.default)(config === null || config === void 0 ? void 0 : config.limit, result); return { url: URL, diff --git a/dist/index.js.map b/dist/index.js.map index 41bd12e..2b89a6b 100644 --- a/dist/index.js.map +++ b/dist/index.js.map @@ -1 +1 @@ -{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,gEAAyD;AACzD,gEAAyD;AAoB9B,uBApBpB,sBAAY,CAoBoB;AAnBvC,gEAAyD;AACzD,kEAA2D;AAC3D,4DAAqD;AAIrD,SAAe,gBAAgB,CAAC,MAAc;;QAC5C,IAAA,2BAAiB,EAAC,MAAM,CAAC,CAAC;QAE1B,MAAM,GAAG,GAAG,IAAA,4BAAkB,EAAC,MAAM,CAAC,CAAC;QACvC,MAAM,MAAM,GAAG,MAAM,IAAA,2BAAiB,EAAC,GAAG,CAAC,CAAC;QAC5C,MAAM,YAAY,GAAG,IAAA,yBAAe,EAAC,MAAM,aAAN,MAAM,uBAAN,MAAM,CAAE,KAAK,EAAE,MAAM,CAAC,CAAC;QAE5D,OAAO;YACL,GAAG,EAAE,GAAG;YACR,MAAM,EAAE,YAAY;SACrB,CAAC;IACJ,CAAC;CAAA;AAEQ,4CAAgB"} \ No newline at end of file +{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,iFAAyD;AACzD,iFAAyD;AAoB9B,uBApBpB,sBAAY,CAoBoB;AAnBvC,iFAAyD;AACzD,mFAA2D;AAC3D,6EAAqD;AAIrD,SAAe,gBAAgB,CAAC,MAAc;;QAC5C,IAAA,2BAAiB,EAAC,MAAM,CAAC,CAAC;QAE1B,MAAM,GAAG,GAAG,IAAA,4BAAkB,EAAC,MAAM,CAAC,CAAC;QACvC,MAAM,MAAM,GAAG,MAAM,IAAA,2BAAiB,EAAC,GAAG,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;QAC1D,MAAM,YAAY,GAAG,IAAA,yBAAe,EAAC,MAAM,aAAN,MAAM,uBAAN,MAAM,CAAE,KAAK,EAAE,MAAM,CAAC,CAAC;QAE5D,OAAO;YACL,GAAG,EAAE,GAAG;YACR,MAAM,EAAE,YAAY;SACrB,CAAC;IACJ,CAAC;CAAA;AAEQ,4CAAgB"} \ No newline at end of file diff --git a/dist/utils/utils.js b/dist/utils/utils.js index 39526cd..4e30390 100644 --- a/dist/utils/utils.js +++ b/dist/utils/utils.js @@ -1,7 +1,10 @@ "use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; Object.defineProperty(exports, "__esModule", { value: true }); exports.isImage = exports.unicodeToChar = exports.buildQuery = void 0; -const IMAGES_EXTENSIONS_json_1 = require("../constant/extensions/IMAGES_EXTENSIONS.json"); +const IMAGES_EXTENSIONS_json_1 = __importDefault(require("../constant/extensions/IMAGES_EXTENSIONS.json")); /** * Build the query for url * @param query diff --git a/dist/utils/utils.js.map b/dist/utils/utils.js.map index e4df97c..4520606 100644 --- a/dist/utils/utils.js.map +++ b/dist/utils/utils.js.map @@ -1 +1 @@ -{"version":3,"file":"utils.js","sourceRoot":"","sources":["../../src/utils/utils.ts"],"names":[],"mappings":";;;AAAA,0FAAuE;AAEvE;;;;GAIG;AACH,SAAS,UAAU,CAAC,KAA6B;IAC/C,MAAM,MAAM,GAAG,EAAE,CAAC;IAClB,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAElC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE;QAC1B,MAAM,SAAS,GAAG,KAAK,CAAC;QACxB,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,IAAI,kBAAkB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;KACjE;IAED,OAAO,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAChC,CAAC;AAsBQ,gCAAU;AApBnB;;;;GAIG;AACH,SAAS,aAAa,CAAC,IAAY;IACjC,OAAO,IAAI,CAAC,OAAO,CAAC,iBAAiB,EAAE,UAAU,KAAK;QACpD,OAAO,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;AACL,CAAC;AAWoB,sCAAa;AATlC;;;;GAIG;AACH,SAAS,OAAO,CAAC,OAAO,GAAG,EAAE;IAC3B,OAAO,gCAAU,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;AACrE,CAAC;AAEmC,0BAAO"} \ No newline at end of file +{"version":3,"file":"utils.js","sourceRoot":"","sources":["../../src/utils/utils.ts"],"names":[],"mappings":";;;;;;AAAA,2GAAuE;AAEvE;;;;GAIG;AACH,SAAS,UAAU,CAAC,KAA6B;IAC/C,MAAM,MAAM,GAAG,EAAE,CAAC;IAClB,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAElC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE;QAC1B,MAAM,SAAS,GAAG,KAAK,CAAC;QACxB,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,IAAI,kBAAkB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;KACjE;IAED,OAAO,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAChC,CAAC;AAsBQ,gCAAU;AApBnB;;;;GAIG;AACH,SAAS,aAAa,CAAC,IAAY;IACjC,OAAO,IAAI,CAAC,OAAO,CAAC,iBAAiB,EAAE,UAAU,KAAK;QACpD,OAAO,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;AACL,CAAC;AAWoB,sCAAa;AATlC;;;;GAIG;AACH,SAAS,OAAO,CAAC,OAAO,GAAG,EAAE;IAC3B,OAAO,gCAAU,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;AACrE,CAAC;AAEmC,0BAAO"} \ No newline at end of file diff --git a/test-back/test-filter-titles.js b/example/index.js similarity index 69% rename from test-back/test-filter-titles.js rename to example/index.js index 5d8cf14..97c0dad 100644 --- a/test-back/test-filter-titles.js +++ b/example/index.js @@ -1,12 +1,10 @@ const { GOOGLE_IMG_SCRAP } = require("../dist"); +//simple example to see if it's working without launching tests (async function () { const test = await GOOGLE_IMG_SCRAP({ search: "cats", - filterByTitles: [ - ["draw", "white"], - ["albino", "white"], - ], + limit: 5, }); console.log(test, test.result.length); diff --git a/jest.config.ts b/jest.config.ts index ab8b552..2b06509 100644 --- a/jest.config.ts +++ b/jest.config.ts @@ -5,7 +5,6 @@ const config: Config.InitialOptions = { rootDir: "test", testEnvironment: "node", verbose: true, - automock: true, }; export default config; diff --git a/package.json b/package.json index e93234a..652e9a2 100644 --- a/package.json +++ b/package.json @@ -4,10 +4,8 @@ "description": "Scrap images from google images with customs pre filled google dork options", "main": "./dist/index.js", "types": "./dist/index.d.ts", - "directories": { - "test": "test" - }, "scripts": { + "example": "node ./example/index.js", "build": "tsc", "test": "jest" }, diff --git a/src/core/parseGoogleImages.ts b/src/core/parseGoogleImages.ts index 6bef934..c537cb7 100644 --- a/src/core/parseGoogleImages.ts +++ b/src/core/parseGoogleImages.ts @@ -1,68 +1,106 @@ import { isImage } from "../utils/utils"; import GOOGLE_CONSTANT from "../constant/GOOGLE_CONSTANT"; -import axios from "axios"; +import axios, { AxiosProxyConfig } from "axios"; import ImageResultItem from "../../types/imageResultItem"; const { FastHTMLParser } = require("fast-html-dom-parser"); /** - * Parse the html from google image to get the images links + * Scrap google images scripts tag * @param url * @returns */ -async function parseGoogleImages(url: string): Promise { +async function scrapGoogleImagesScriptsTag( + url: string, + proxy?: AxiosProxyConfig +) { const { data } = await axios(url, { headers: GOOGLE_CONSTANT.headers, + ...(proxy ?? {}), }); + const parser = new FastHTMLParser(data); const scripts = parser.getElementsByTagName("script"); + return scripts; +} + +function getGoogleImageObject( + informationsMatch: RegExpExecArray, + otherInformationsMatch: RegExpExecArray +) { + const [r, g, b] = [ + informationsMatch[4], + informationsMatch[5], + informationsMatch[6], + ].map((e) => parseInt(e, 10)); + + return { + id: otherInformationsMatch[1], + title: otherInformationsMatch[3], + url: informationsMatch[1], + originalUrl: otherInformationsMatch[2], + averageColor: `rgb(${r}, ${g}, ${b})`, + averageColorObject: { + r, + g, + b, + }, + height: parseInt(informationsMatch[2], 10), + width: parseInt(informationsMatch[3], 10), + }; +} + +/** + * Parse the html from google image to get the images links + * @param url + * @returns + */ +async function parseGoogleImages( + url: string, + proxy?: AxiosProxyConfig +): Promise { const result: ImageResultItem[] = []; + const scripts = await scrapGoogleImagesScriptsTag(url, proxy); + if (!scripts) return result; for (const script of scripts) { const body = script.innerHTML; + // if we dont find any image extension we can skip if (!isImage(body)) continue; - //getting image url, height, width, average - const regex = + //getting image url, height, width, color average + const informationsRegex = /\["(http[^"]+?)",(\d+),(\d+)\],[\w\d]+?,[\w\d]+?,"rgb\((\d+),(\d+),(\d+)\)"/gi; //getting originalUrl, title, id - const secondRegex = /\[[\w\d]+?,"([^"]+?)","(http[^"]+?)","([^"]+?)"/gi; + const otherInformationsRegex = + /\[[\w\d]+?,"([^"]+?)","(http[^"]+?)","([^"]+?)"/gi; - let res = null; - let secondRes = null; + let informationsMatch: RegExpExecArray, + otherInformationsMatch: RegExpExecArray; while ( - (res = regex.exec(body)) != null && - (secondRes = secondRegex.exec(body)) != null + (informationsMatch = informationsRegex.exec(body)) !== null && + (otherInformationsMatch = otherInformationsRegex.exec(body)) !== null ) { + if (informationsMatch.length < 4 || otherInformationsMatch.length < 4) + continue; if ( - res.length >= 4 && - res[1].match(/http/gi).length < 2 && - secondRes.length === 4 && - secondRes[2].match(/http/gi).length < 2 - ) { - const [r, g, b] = [res[4], res[5], res[6]].map((e) => parseInt(e, 10)); - - result.push({ - id: secondRes[1], - title: secondRes[3], - url: res[1], - originalUrl: secondRes[2], - averageColor: `rgb(${r}, ${g}, ${b})`, - averageColorObject: { - r, - g, - b, - }, - height: parseInt(res[2], 10), - width: parseInt(res[3], 10), - }); - } + informationsMatch[1].match(/http/gi).length > 2 || + otherInformationsMatch[2].match(/http/gi).length > 2 + ) + continue; + + result.push( + getGoogleImageObject(informationsMatch, otherInformationsMatch) + ); } + + //if we get the correct scripts with all images we can exit + if (result.length > 0) return result; } return result; diff --git a/src/index.ts b/src/index.ts index 32099da..2bbe592 100644 --- a/src/index.ts +++ b/src/index.ts @@ -10,7 +10,7 @@ async function GOOGLE_IMG_SCRAP(config: Config): Promise { verifyGoogleQuery(config); const URL = constructGoogleUrl(config); - const result = await parseGoogleImages(URL); + const result = await parseGoogleImages(URL, config.proxy); const slicedResult = limitResultSize(config?.limit, result); return { diff --git a/test-back/test-last-version.js b/test-back/test-last-version.js deleted file mode 100644 index 36f7ef0..0000000 --- a/test-back/test-last-version.js +++ /dev/null @@ -1,19 +0,0 @@ -const { GOOGLE_IMG_SCRAP } = require("../src"); -const V107 = require("../src/back/google-img-scrap-1.0.7-.js"); - -// console.log(GOOGLE_QUERY); - -(async function () { - const test = await GOOGLE_IMG_SCRAP({ - search: "cats", - }); - - const test2 = await V107.GOOGLE_IMG_SCRAP({ - search: "cats", - execute: function (element) { - if (!element.url.match("gstatic.com")) return element; - }, - }); - - console.log(test.result.length, test2.result.length); -})(); diff --git a/test-back/test-result-limit.js b/test-back/test-result-limit.js deleted file mode 100644 index 60e7107..0000000 --- a/test-back/test-result-limit.js +++ /dev/null @@ -1,25 +0,0 @@ -const { GOOGLE_IMG_SCRAP } = require("../src/google-img-scrap"); - -(async function () { - const limit = 5; - const testNoLimit = await GOOGLE_IMG_SCRAP({ - search: "cats", - execute: function (element) { - if (!element.url.match("gstatic.com")) return element; - }, - }); - const testLimit = await GOOGLE_IMG_SCRAP({ - search: "cats", - limit, - execute: function (element) { - if (!element.url.match("gstatic.com")) return element; - }, - }); - try { - console.log( - `limit : ${limit}, testNoLimit length : ${testNoLimit.result.length}, testLimit length : ${testLimit.result.length}` - ); - } catch (error) { - console.log(error); - } -})(); diff --git a/test-back/test-simple.js b/test-back/test-simple.js deleted file mode 100644 index e4ac4ae..0000000 --- a/test-back/test-simple.js +++ /dev/null @@ -1,11 +0,0 @@ -const { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } = require("../src/google-img-scrap"); - -// console.log(GOOGLE_QUERY); - -(async function () { - const test = await GOOGLE_IMG_SCRAP({ - search: "cats", - }); - - console.log(test, test.result.length); -})(); diff --git a/test-back/test-url-match.js b/test-back/test-url-match.js deleted file mode 100644 index ad7ff19..0000000 --- a/test-back/test-url-match.js +++ /dev/null @@ -1,13 +0,0 @@ -const { GOOGLE_IMG_SCRAP } = require("../src/google-img-scrap"); - -(async function () { - const test = await GOOGLE_IMG_SCRAP({ - search: "cats", - urlMatch: [["cdn"], ["istockphoto"]], - execute: function (element) { - if (!element.url.match("gstatic.com")) return element; - }, - }); - - console.log(test, test.result.length); -})(); diff --git a/test-back/test-wallpaper.js b/test-back/test-wallpaper.js deleted file mode 100644 index fc049ba..0000000 --- a/test-back/test-wallpaper.js +++ /dev/null @@ -1,21 +0,0 @@ -const { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } = require("../src/google-img-scrap"); - -(async function () { - const test = await GOOGLE_IMG_SCRAP({ - search: "demon slayer background hd", - query: { - SIZE: GOOGLE_QUERY.SIZE.LARGE, - }, - domains: ["alphacoders.com"], - safeSearch: false, - execute: function (element) { - if (!element.url.match("gstatic.com")) return element; - }, - }); - - console.log( - test, - test.result[test.result.length - 1].url, - test.result.length - ); -})(); diff --git a/test-back/test.js b/test-back/test.js deleted file mode 100644 index 6805ec6..0000000 --- a/test-back/test.js +++ /dev/null @@ -1,24 +0,0 @@ -const { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } = require("../dist/index.js"); - -// console.log(GOOGLE_QUERY); - -(async function () { - const test = await GOOGLE_IMG_SCRAP({ - search: "cats", - query: { - TYPE: GOOGLE_QUERY.TYPE.CLIPART, - DATE: GOOGLE_QUERY.DATE.YEAR, - COLOR: GOOGLE_QUERY.COLOR.BLACK_AND_WHITE, - SIZE: GOOGLE_QUERY.SIZE.LARGE, - LICENCE: GOOGLE_QUERY.LICENCE.COMMERCIAL_AND_OTHER, - EXTENSION: GOOGLE_QUERY.EXTENSION.JPG, - }, - excludeWords: ["black", "white"], //If you don't like black and white cats - custom: "name=content&name2=content2", - safeSearch: false, - - // excludeDomains: ["istockphoto.com", "alamy.com"] - }); - - console.log(test, test.result.length); -})(); diff --git a/test/domains.test.ts b/test/domains.test.ts new file mode 100644 index 0000000..fda7d64 --- /dev/null +++ b/test/domains.test.ts @@ -0,0 +1,17 @@ +import { GOOGLE_IMG_SCRAP } from "../dist"; + +describe("Domains test", function () { + it("All result should be one of those specific domains", async function () { + const domains = [ + "https://fr.wikipedia.org/", + "https://wall.alphacoders.com/", + ]; + const { result } = await GOOGLE_IMG_SCRAP({ + search: "cats", + domains, + }); + for (const img of result) { + expect(domains.some((domain) => img.url.includes(domain))).toBeTruthy(); + } + }); +}); diff --git a/test/excludeDomains.test.ts b/test/excludeDomains.test.ts new file mode 100644 index 0000000..1194783 --- /dev/null +++ b/test/excludeDomains.test.ts @@ -0,0 +1,19 @@ +import { GOOGLE_IMG_SCRAP } from "../dist"; + +describe("Exclude domains test", function () { + it("All result shouldn't includes those domains", async function () { + const excludeDomains = [ + "https://fr.wikipedia.org/", + "https://wall.alphacoders.com/", + ]; + const { result } = await GOOGLE_IMG_SCRAP({ + search: "cats", + excludeDomains, + }); + for (const img of result) { + expect( + excludeDomains.some((domain) => img.url.includes(domain)) + ).toBeFalsy(); + } + }); +}); diff --git a/test/excludeWords.test.ts b/test/excludeWords.test.ts new file mode 100644 index 0000000..8e9e44d --- /dev/null +++ b/test/excludeWords.test.ts @@ -0,0 +1,14 @@ +import { GOOGLE_IMG_SCRAP } from "../dist"; + +describe("Exclude words test", function () { + it("All title shouldn't have a specific word", async function () { + const excludeWords = ["white", "black"]; + const { result } = await GOOGLE_IMG_SCRAP({ + search: "cats", + excludeWords, + }); + for (const img of result) { + expect(excludeWords.some((word) => img.title.includes(word))).toBeFalsy(); + } + }); +}); diff --git a/test/filterByTitles.test.ts b/test/filterByTitles.test.ts new file mode 100644 index 0000000..2b5470d --- /dev/null +++ b/test/filterByTitles.test.ts @@ -0,0 +1,20 @@ +import { GOOGLE_IMG_SCRAP } from "../dist"; + +describe("Filter by titles test", function () { + it("All title shouldn't have a specific word", async function () { + //will build something like this "(draw and white) or (albino and white)" + const filterByTitles = [ + ["draw", "white"], + ["albino", "white"], + ]; + const { result } = await GOOGLE_IMG_SCRAP({ + search: "cats", + filterByTitles, + }); + for (const img of result) { + expect( + filterByTitles.flat().some((word) => img.title.includes(word)) + ).toBeTruthy(); + } + }); +}); diff --git a/test/limit.test.ts b/test/limit.test.ts new file mode 100644 index 0000000..b6f2330 --- /dev/null +++ b/test/limit.test.ts @@ -0,0 +1,18 @@ +import { GOOGLE_IMG_SCRAP } from "../dist"; + +describe("Limit test", function () { + it("Should return only 5 elements", async function () { + const { result } = await GOOGLE_IMG_SCRAP({ + search: "cats", + limit: 5, + }); + expect(result.length).toBe(5); + }); + it("Should return all elements", async function () { + const { result } = await GOOGLE_IMG_SCRAP({ + search: "cats", + limit: 0, + }); + expect(result.length).toBeGreaterThan(0); + }); +}); diff --git a/test/proxy.test.ts b/test/proxy.test.ts new file mode 100644 index 0000000..cb20845 --- /dev/null +++ b/test/proxy.test.ts @@ -0,0 +1,16 @@ +import { GOOGLE_IMG_SCRAP } from "../dist"; + +describe("Proxy test", function () { + it("Proxy for axios", async function () { + const { result } = await GOOGLE_IMG_SCRAP({ + search: "cats", + //change proxy if not working + proxy: { + protocol: "https", + host: "201.229.250.19", + port: 80, + }, + }); + expect(result.length).toBeGreaterThan(0); + }); +}); diff --git a/test/query.test.ts b/test/query.test.ts new file mode 100644 index 0000000..3ff5778 --- /dev/null +++ b/test/query.test.ts @@ -0,0 +1,18 @@ +import { GOOGLE_IMG_SCRAP, GOOGLE_QUERY } from "../dist"; + +describe("Query test", function () { + it("Test query params like safeSearch, custom query and query object", async function () { + const { url, result } = await GOOGLE_IMG_SCRAP({ + search: "cats", + safeSearch: true, + custom: "test=true", + query: { + EXTENSION: GOOGLE_QUERY.EXTENSION.PNG, + TYPE: GOOGLE_QUERY.TYPE.DRAW, + }, + }); + expect(result[0].url).toMatch("png"); + expect(url).toMatch("safe=active"); //safe search + expect(url).toMatch("test=true"); //custom query + }); +}); diff --git a/test/urlMatch.test.ts b/test/urlMatch.test.ts new file mode 100644 index 0000000..748df4d --- /dev/null +++ b/test/urlMatch.test.ts @@ -0,0 +1,21 @@ +import { GOOGLE_IMG_SCRAP } from "../dist"; + +describe("Url match test", function () { + it("All url should match a specific regex", async function () { + //will build something like this "(cdn and wikipedia) or (cdn istockphoto)" + const urlMatch = [ + ["cdn", "wikipedia"], + ["cdn", "istockphoto"], + ]; + const { result } = await GOOGLE_IMG_SCRAP({ + search: "cats", + urlMatch, + }); + + for (const img of result) { + expect( + urlMatch.flat().some((word) => img.url.includes(word)) + ).toBeTruthy(); + } + }); +}); diff --git a/tsconfig.json b/tsconfig.json index fba79ac..f1c018d 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,6 +1,7 @@ { "compilerOptions": { "module": "CommonJS", + "esModuleInterop": true, "allowSyntheticDefaultImports": true, "target": "ES6", "noImplicitAny": true, @@ -8,9 +9,9 @@ "sourceMap": true, "outDir": "dist", "resolveJsonModule": true, - "types": ["node"], + "types": ["node", "jest"], "declaration": true, - "typeRoots": ["types"] + "typeRoots": ["types", "node_modules/@types"] }, "include": ["src/**/*", "types/**/*"] } diff --git a/types/config.d.ts b/types/config.d.ts index 95912aa..f88dc57 100644 --- a/types/config.d.ts +++ b/types/config.d.ts @@ -1,5 +1,8 @@ +import { AxiosProxyConfig } from "axios"; + type Config = { search: string; + proxy?: AxiosProxyConfig; limit?: number; query?: { TYPE?: string;