diff --git a/samples/ExtractDataFromPDFsAndImagesWithGPT/README.md b/samples/ExtractDataFromPDFsAndImagesWithGPT/README.md new file mode 100644 index 0000000..edce989 --- /dev/null +++ b/samples/ExtractDataFromPDFsAndImagesWithGPT/README.md @@ -0,0 +1,77 @@ +--- +page_type: sample +languages: +- powerautomate-comma +products: +- powerautomate +name: Extract Data From PDFs And Images With GPT +description: Uses AI Builder's OCR for PDFs & Images to extract the text present in a file, replicates the file in a text (txt) format, then passes it off to a GPT prompt action for things like data extraction. +urlFragment: powerapps-calendar-component +ms.date: 08/30/2023 +author: tylerkolota +ms.author: pnp +level: intermediate +ms.prod: power-automate +--- + +# Calendar Component + +## Summary + +Uses AI Builder's OCR for PDFs & Images to extract the text present in a file, replicates the file in a text (txt) format, then passes it off to a GPT prompt action for things like data extraction. + +![Preview](./assets/GPTDataExtractionThumbnail.png) +![Preview](./assets/preview2.png) + + +## Applies to + +* [Microsoft Power Automate](https://docs.microsoft.com/powerautomate/) + +## Compatibility + +![Power Apps Source File Pack and Unpack Utility 0.20](https://img.shields.io/badge/Packing%20Tool-0.20-green.svg) +![Premium License](https://img.shields.io/badge/Premium%20License-Not%20Required-green.svg "Premium Power Automate license not required") +![Experimental Features](https://img.shields.io/badge/Experimental%20Features-No-green.svg "Does not rely on experimental features") +![On-Premises Connectors](https://img.shields.io/badge/On--Premises%20Connectors-No-green.svg "Does not use on-premise connectors") +![Custom Connectors](https://img.shields.io/badge/Custom%20Connectors-Not%20Required-green.svg "Does not use custom connectors") + +## Authors + +Solution|Author(s) +--------|--------- +Extract Data From PDFs And Images With GPT | [Tyler Kolota](https://github.com/takolota) ([@tylerkolota](https://www.linkedin.com/in/kolota) ) + +## Version history + +Version|Date|Comments +-------|----|-------- +2.7|August 30, 2023|Initial release + + +## Features + +## Prerequisites + +AI Builder credits + +### Setting Up The Flow + +To set up the flow, you can follow the video guide at https://powerusers.microsoft.com/t5/Power-Automate-Cookbook/Extract-Data-From-PDFs-and-Images-With-GPT/td-p/2201345 + +## Data Sources + +OneDrive, SharePoint, or any other file source. + + + +## Disclaimer + +**THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.** + + +## Support + +For questions & feedback, please visit https://powerusers.microsoft.com/t5/Power-Automate-Cookbook/Extract-Data-From-PDFs-and-Images-With-GPT/td-p/2201345 + + diff --git a/samples/ExtractDataFromPDFsAndImagesWithGPT/assets/GPTDataExtractionThumbnail.png b/samples/ExtractDataFromPDFsAndImagesWithGPT/assets/GPTDataExtractionThumbnail.png new file mode 100644 index 0000000..e2b0fbc Binary files /dev/null and b/samples/ExtractDataFromPDFsAndImagesWithGPT/assets/GPTDataExtractionThumbnail.png differ diff --git a/samples/ExtractDataFromPDFsAndImagesWithGPT/assets/preview2.png b/samples/ExtractDataFromPDFsAndImagesWithGPT/assets/preview2.png new file mode 100644 index 0000000..d628464 Binary files /dev/null and b/samples/ExtractDataFromPDFsAndImagesWithGPT/assets/preview2.png differ diff --git a/samples/ExtractDataFromPDFsAndImagesWithGPT/solution/GPTDataExtraction-PDFs&ImagesV2.7/Microsoft.Flow/flows/d02adafe-f0e6-40c5-9efd-1d6196af87e7/apisMap.json b/samples/ExtractDataFromPDFsAndImagesWithGPT/solution/GPTDataExtraction-PDFs&ImagesV2.7/Microsoft.Flow/flows/d02adafe-f0e6-40c5-9efd-1d6196af87e7/apisMap.json new file mode 100644 index 0000000..03b9f7f --- /dev/null +++ b/samples/ExtractDataFromPDFsAndImagesWithGPT/solution/GPTDataExtraction-PDFs&ImagesV2.7/Microsoft.Flow/flows/d02adafe-f0e6-40c5-9efd-1d6196af87e7/apisMap.json @@ -0,0 +1 @@ +{"shared_commondataserviceforapps_1":"046d0d3e-1564-4eab-b2bc-c1abf3361f02","shared_onedriveforbusiness_1":"6918a60c-aeef-4313-93dc-93a4628489df"} \ No newline at end of file diff --git a/samples/ExtractDataFromPDFsAndImagesWithGPT/solution/GPTDataExtraction-PDFs&ImagesV2.7/Microsoft.Flow/flows/d02adafe-f0e6-40c5-9efd-1d6196af87e7/connectionsMap.json b/samples/ExtractDataFromPDFsAndImagesWithGPT/solution/GPTDataExtraction-PDFs&ImagesV2.7/Microsoft.Flow/flows/d02adafe-f0e6-40c5-9efd-1d6196af87e7/connectionsMap.json new file mode 100644 index 0000000..9546a7c --- /dev/null +++ b/samples/ExtractDataFromPDFsAndImagesWithGPT/solution/GPTDataExtraction-PDFs&ImagesV2.7/Microsoft.Flow/flows/d02adafe-f0e6-40c5-9efd-1d6196af87e7/connectionsMap.json @@ -0,0 +1 @@ +{"shared_commondataserviceforapps_1":"60e38b69-b97e-462a-a1fe-21c0ca05795f","shared_onedriveforbusiness_1":"2e6ea997-827b-4116-af72-75bccd815198"} \ No newline at end of file diff --git a/samples/ExtractDataFromPDFsAndImagesWithGPT/solution/GPTDataExtraction-PDFs&ImagesV2.7/Microsoft.Flow/flows/d02adafe-f0e6-40c5-9efd-1d6196af87e7/definition.json b/samples/ExtractDataFromPDFsAndImagesWithGPT/solution/GPTDataExtraction-PDFs&ImagesV2.7/Microsoft.Flow/flows/d02adafe-f0e6-40c5-9efd-1d6196af87e7/definition.json new file mode 100644 index 0000000..efdb3f2 --- /dev/null +++ b/samples/ExtractDataFromPDFsAndImagesWithGPT/solution/GPTDataExtraction-PDFs&ImagesV2.7/Microsoft.Flow/flows/d02adafe-f0e6-40c5-9efd-1d6196af87e7/definition.json @@ -0,0 +1 @@ +{"name":"d2d31e1b-a6d3-4f9b-a31a-479cad0b11f8","id":"/providers/Microsoft.Flow/flows/d2d31e1b-a6d3-4f9b-a31a-479cad0b11f8","type":"Microsoft.Flow/flows","properties":{"apiId":"/providers/Microsoft.PowerApps/apis/shared_logicflows","displayName":"GPT Data Extraction - PDFs & Images V2.7","definition":{"metadata":{"workflowEntityId":null,"processAdvisorMetadata":null,"flowChargedByPaygo":null,"flowclientsuspensionreason":"None","flowclientsuspensiontime":null,"flowclientsuspensionreasondetails":null,"creator":{"id":"4d4fadf8-39e1-477f-8941-9686478b0cdd","type":"User","tenantId":"7c1f24a6-7d39-452c-8237-0726e3b19a73"},"provisioningMethod":"FromDefinition","failureAlertSubscription":true,"clientLastModifiedTime":"2023-08-30T18:53:50.7795654Z"},"$schema":"https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#","contentVersion":"1.0.0.0","parameters":{"$connections":{"defaultValue":{},"type":"Object"},"$authentication":{"defaultValue":{},"type":"SecureObject"}},"triggers":{"manual":{"metadata":{"operationMetadataId":"10b1ccc1-de00-4358-95b8-d8e72f5b7c85"},"type":"Request","kind":"Button","inputs":{"schema":{"type":"object","properties":{},"required":[]}}}},"actions":{"Create_text_with_GPT":{"runAfter":{"Convert_to_txt":["Succeeded"]},"metadata":{"operationMetadataId":"4f627591-70f5-4dff-9608-ce28abd5b492","flowSystemMetadata":{"portalOperationId":"aibuilderpredict_gptpromptengineering","portalOperationGroup":"aibuilder","portalOperationApiDisplayNameOverride":"AI Builder","portalOperationIconOverride":"https://content.powerapps.com/resource/makerx/static/pauto/images/designeroperations/aiBuilderNew.51dbdb6b.png","portalOperationBrandColorOverride":"#0A76C4","portalOperationApiTierOverride":"Standard"}},"type":"OpenApiConnection","inputs":{"host":{"apiId":"/providers/Microsoft.PowerApps/apis/shared_commondataserviceforapps","connectionName":"shared_commondataserviceforapps_1","operationId":"aibuilderpredict_gptpromptengineering"},"parameters":{"item/requestv2/prompt":"From the OCR captured invoice document text provided between [Start of text] & [End of text] markers, extract data for each of the example JSON fields between [Start example JSON] & [End example JSON].\nBe aware, OCR text may contain errors like wrong characters or missing formatting.\nIn the JSON, replace [data] with each field's extracted data & use an \"N/A\" string when the field's data is not found.\nConvert all numbers to a standard 2-place decimal notation.\nConvert all dates to international format \"yyyy-mm-dd\".\nRepresented each data value as a string type.\nMode Of Shipment (MOS) may also go by Mode Of Transport & the value is usually Air, Sea, or Land.\nThere may be any number of Product Lines.\n\n\n[Start example JSON]\n{\n\"Invoice Date\": [data],\n\"Invoice Number\": [data],\n\"Purchase Order (PO) Number\": [data],\n\"Incoterms\": [data],\n\"Delivery Or Ship To Address\": [data],\n\"Consignee Address\": [data],\n\"Mode Of Shipment\": [data],\n\"Product Lines\": [\n{\n\"Product Name\": [data],\n\"Product Quantity\": [data],\n\"Product Unit Price\": [data],\n\"Product Line Total or Amount\": [data],\n\"Manufacturer\": [data]\n},\n{\n\"Product Name\": [data],\n\"Product Quantity\": [data],\n\"Product Unit Price\": [data],\n\"Product Line Total or Amount\": [data],\n\"Manufacturer\": [data]\n}\n],\n\"Invoice Total\": [data],\n\"Banking Details\": [data]\n}\n[End example JSON]\n\n\n[Start of text]\n@{outputs('Combined_txt_output')}\n[End of text]\n\n\nReturn only the final JSON object. Do not return any other output descriptions or explanations, only the JSON object.","recordId":"a1afa5d4-7a44-4c31-9cd2-e852a78431fa","item/requestv2/parameters":"{}"},"authentication":"@parameters('$authentication')"}},"Get_file_metadata":{"runAfter":{"Initialize_variable_EachPage":["Succeeded"]},"metadata":{"b!y2rxipfTvUi7ALZMiyxLa89aQqkbMORMksjFtKI_dfKgJA8V37mjQICJm9mYgy-T.01JWBUU4DL3PCH5K65FRCL2IWF3UTNJUWG":"/POD-Kenya PO10023608.pdf","operationMetadataId":"1796706b-e357-4578-b651-8e27cf82b03e","b!y2rxipfTvUi7ALZMiyxLa89aQqkbMORMksjFtKI_dfKgJA8V37mjQICJm9mYgy-T.01JWBUU4C4OWY64YOCSJGJULFZ2CNR7L2K":"/POD-Kenya PO10023608.pdf","b!y2rxipfTvUi7ALZMiyxLa89aQqkbMORMksjFtKI_dfKgJA8V37mjQICJm9mYgy-T.01JWBUU4GNEWKHDBRESJH2GDE4DZFD5EHT":"/Abbott INV_1303673643_POD.pdf","b!y2rxipfTvUi7ALZMiyxLa89aQqkbMORMksjFtKI_dfKgJA8V37mjQICJm9mYgy-T.01JWBUU4DAPRHZ6W7KBZBJKYJSOMNNMIVV":"/Roche INV_8304933707.pdf","b!y2rxipfTvUi7ALZMiyxLa89aQqkbMORMksjFtKI_dfKgJA8V37mjQICJm9mYgy-T.01JWBUU4DGTO6UGLWL6RDLGGR2Q3JMVCRQ":"/Tyler Kolota Resume Chemonics 2023 (1).pdf","b!y2rxipfTvUi7ALZMiyxLa89aQqkbMORMksjFtKI_dfKgJA8V37mjQICJm9mYgy-T.01JWBUU4AKTW6BVXWHERCZSUDGHOTMI27O":"/EastRepairInvoice.png","b!y2rxipfTvUi7ALZMiyxLa89aQqkbMORMksjFtKI_dfKgJA8V37mjQICJm9mYgy-T.01JWBUU4GZHSEFLOQ2TJA2NIOOXKYIQ5Y2":"/StanfordPlumbingInvoice.png","b!y2rxipfTvUi7ALZMiyxLa89aQqkbMORMksjFtKI_dfKgJA8V37mjQICJm9mYgy-T.01JWBUU4A37SOBHIQIXRD2G4ED3YGT4SBR":"/RO10133137_PO10023315_INV_TNRP65465.pdf","b!y2rxipfTvUi7ALZMiyxLa89aQqkbMORMksjFtKI_dfKgJA8V37mjQICJm9mYgy-T.01JWBUU4CZO265H55Z4JGKSRJ6FJWIHEYV":"/Hologic INV_31012396.pdf"},"type":"OpenApiConnection","inputs":{"host":{"apiId":"/providers/Microsoft.PowerApps/apis/shared_onedriveforbusiness","connectionName":"shared_onedriveforbusiness_1","operationId":"GetFileMetadata"},"parameters":{"id":"Choose your PDF or image file"},"authentication":{"value":"@json(decodeBase64(triggerOutputs().headers['X-MS-APIM-Tokens']))['$ConnectionKey']","type":"Raw"}}},"Get_file_content":{"runAfter":{"Get_file_metadata":["Succeeded"]},"metadata":{"operationMetadataId":"8b379bcb-615a-42ff-b39b-e9adea5daa69"},"type":"OpenApiConnection","inputs":{"host":{"apiId":"/providers/Microsoft.PowerApps/apis/shared_onedriveforbusiness","connectionName":"shared_onedriveforbusiness_1","operationId":"GetFileContent"},"parameters":{"id":"@outputs('Get_file_metadata')?['body/Id']","inferContentType":true},"authentication":{"value":"@json(decodeBase64(triggerOutputs().headers['X-MS-APIM-Tokens']))['$ConnectionKey']","type":"Raw"}}},"Recognize_text_in_an_image_or_a_PDF_document":{"runAfter":{"Get_file_content":["Succeeded"]},"metadata":{"operationMetadataId":"3e357f5f-d994-409b-bb47-3d7e43c70439","flowSystemMetadata":{"portalOperationId":"aibuilderpredict_textrecognition","portalOperationGroup":"aibuilder","portalOperationApiDisplayNameOverride":"AI Builder","portalOperationIconOverride":"https://content.powerapps.com/resource/makerx/static/pauto/images/designeroperations/aiBuilderNew.51dbdb6b.png","portalOperationBrandColorOverride":"#0A76C4","portalOperationApiTierOverride":"Standard"}},"type":"OpenApiConnection","inputs":{"host":{"apiId":"/providers/Microsoft.PowerApps/apis/shared_commondataserviceforapps","connectionName":"shared_commondataserviceforapps_1","operationId":"aibuilderpredict_textrecognition"},"parameters":{"item/requestv2/base64Encoded":"@body('Get_file_content')","recordId":"86419a67-205a-454f-b6fc-601394f2786d"},"authentication":"@parameters('$authentication')"}},"Initialize_variable_EachPage":{"runAfter":{"StaticVariables":["Succeeded"]},"metadata":{"operationMetadataId":"bbda957a-609e-4ca5-97fb-356b2a294c4d"},"type":"InitializeVariable","inputs":{"variables":[{"name":"EachPage","type":"array"}]}},"PageNumbers":{"runAfter":{"Recognize_text_in_an_image_or_a_PDF_document":["Succeeded"]},"metadata":{"operationMetadataId":"370e3702-6ec7-426d-aa47-4053957c13c5"},"type":"Compose","inputs":"All","description":"Default is All for all pages. Otherwise, select which OCR file pages you want to pass on to the text conversion & GPT prompt by inputting each page number separated by a comma, ex: 1,2,5,6,7"},"StaticVariables":{"runAfter":{},"metadata":{"operationMetadataId":"a481bd89-b379-49fd-a408-8aa7c5b2d56f"},"type":"Compose","inputs":{"LineBreak":"@decodeUriComponent('%0A')","BlankSpaces":" "},"description":"BlankSpaces property allows the later FormTextPropertyWithHorizontalSpacing action to reference any number of blank lines to form the correct spacing between text coordinates."},"Convert_to_txt":{"actions":{"Combined_txt_output":{"runAfter":{"Select_ResortPagesAndLines":["Succeeded"]},"metadata":{"operationMetadataId":"daa1e0fe-4df0-477b-b32b-997f4ccbf8a2"},"type":"Compose","inputs":"@join(\r\nbody('Select_ResortPagesAndLines'), \r\nconcat(outputs('StaticVariables')?['LineBreak'], outputs('StaticVariables')?['LineBreak'], outputs('StaticVariables')?['LineBreak'])\r\n)"},"Select_ResortPagesAndLines":{"runAfter":{"Apply_to_each_Convert_to_txt":["Succeeded"]},"metadata":{"operationMetadataId":"16822721-9a21-46bd-8b8a-7f3f5663b7de"},"type":"Select","inputs":{"from":"@sort(variables('EachPage'))","select":"@join(skip(split(item(), '###'), 1), '')"},"description":"Since the above loop was collecting lines with concurrency on, the lines may not come out in order. That is why the last action in the loop appended the page # & vertical line # so they can be resorted here. Also removes the resort formatting."},"Apply_to_each_Convert_to_txt":{"foreach":"@body('Filter_array_RemoveUnselectedPageBlanks')","actions":{"RecordsSortedByCoordinates":{"runAfter":{"StaticPageVariables":["Succeeded"]},"metadata":{"operationMetadataId":"c924dcdb-3271-4f90-a1b1-9a4a17dffb4b"},"type":"Compose","inputs":"@sort(body('Select_AddProperties'), 'SortYX')"},"Select_FormTextPropertyWithHorizontalSpacing":{"runAfter":{"RecordsSortedByCoordinates":["Succeeded"]},"metadata":{"operationMetadataId":"fa26ad20-0cad-4bd7-9f5a-4631114c2c2b"},"type":"Select","inputs":{"from":"@range(0, length(outputs('RecordsSortedByCoordinates')))","select":"@addProperty(\r\noutputs('RecordsSortedByCoordinates')[item()],\r\n'TextWithSpacing',\r\n\r\nconcat(\r\ntake(outputs('StaticVariables')?['BlankSpaces'], \r\nmax(1, int(formatnumber(mul(\r\n sub(sub(outputs('RecordsSortedByCoordinates')[item()]?['RawMidX'], outputs('RecordsSortedByCoordinates')[item()]?['HalfCharLengthX']), if(or(equals(item(), 0), not(equals(outputs('RecordsSortedByCoordinates')[item()]?['SortY'], outputs('RecordsSortedByCoordinates')[sub(item(), 1)]?['SortY']))), sub(sub(outputs('StaticPageVariables')?['Min0XRawMidX'], outputs('StaticPageVariables')?['Min0XHalfCharLengthX']), float('.04')), add(outputs('RecordsSortedByCoordinates')[sub(item(), 1)]?['RawMidX'], outputs('RecordsSortedByCoordinates')[sub(item(), 1)]?['HalfCharLengthX']))),\r\n outputs('StaticPageVariables')?['ZoomX']), '#0')))\r\n),\r\noutputs('RecordsSortedByCoordinates')[item()]?['text']\r\n)\r\n\r\n)"},"description":"Calculates spacing between text pieces /w (Current MidX -1/2Current Character Length) - (Previous MidX + 1/2Previous Character Length). Also checks if each is a new-line piece & adjusts to spaces between the current piece & the page's left-most piece"},"Append_to_array_variable_EachPage":{"runAfter":{"Select_ArrayItemForEachHorizontalLine":["Succeeded"]},"metadata":{"operationMetadataId":"9f203c72-1312-43b3-a10d-d09c81d1c473"},"type":"AppendToArrayVariable","inputs":{"name":"EachPage","value":"@{formatnumber(items('Apply_to_each_Convert_to_txt')?['page'], 'D4')}###@{join(body('Select_ArrayItemForEachHorizontalLine'), outputs('StaticVariables')?['LineBreak'])}"}},"Select_ArrayItemForEachHorizontalLine":{"runAfter":{"Select_CombineAllTextWithSpacingAndSortY":["Succeeded"]},"metadata":{"operationMetadataId":"a4db395c-fae6-4953-a54a-af8ebd364309"},"type":"Select","inputs":{"from":"@range(int(outputs('StaticPageVariables')?['MinSortY']), int(outputs('StaticPageVariables')?['MaxSortY']))","select":"@join(\r\nreverse(\r\nskip(\r\nreverse(\r\nskip(\r\nsplit(join(body('Select_CombineAllTextWithSpacingAndSortY'), ''), concat(formatnumber(item(), 'D3'), '|~;')),\r\n1)\r\n),\r\n1)\r\n),\r\n''\r\n)"}},"Select_CombineAllTextWithSpacingAndSortY":{"runAfter":{"Select_FormTextPropertyWithHorizontalSpacing":["Succeeded"]},"metadata":{"operationMetadataId":"2995c262-6b37-4d84-bf97-3b86be967f1b"},"type":"Select","inputs":{"from":"@body('Select_FormTextPropertyWithHorizontalSpacing')","select":"@concat(item()['SortY'], '|~;', item()['TextWithSpacing'], item()['SortY'], '|~;')"}},"Select_AddProperties":{"runAfter":{},"metadata":{"operationMetadataId":"28a312bc-0836-4b6b-9de4-86c4a0df9da1"},"type":"Select","inputs":{"from":"@items('Apply_to_each_Convert_to_txt')?['lines']","select":"@addProperty(addProperty(addProperty(addProperty(addProperty(addProperty(item(),\r\n'SortY',\r\nformatNumber(int(formatNumber(mul(100, add(item()?['boundingBox']?['polygon']['coordinates'][0]['y'], div(sub(item()?['boundingBox']?['polygon']['coordinates'][3]['y'], item()?['boundingBox']?['polygon']['coordinates'][0]['y']), float(2)))), '#0')), 'D3')),\r\n'SortX',\r\nformatNumber(int(formatNumber(mul(100, add(item()?['boundingBox']?['polygon']['coordinates'][0]['x'], div(sub(item()?['boundingBox']?['polygon']['coordinates'][1]['x'], item()?['boundingBox']?['polygon']['coordinates'][0]['x']), float(2)))), '#0')), 'D3')),\r\n'SortYX',\r\nconcat(\r\nformatNumber(int(formatNumber(mul(100, add(item()?['boundingBox']?['polygon']['coordinates'][0]['y'], div(sub(item()?['boundingBox']?['polygon']['coordinates'][3]['y'], item()?['boundingBox']?['polygon']['coordinates'][0]['y']), 2))), '#0')), 'D3'),\r\nformatNumber(int(formatNumber(mul(100, add(item()?['boundingBox']?['polygon']['coordinates'][0]['x'], div(sub(item()?['boundingBox']?['polygon']['coordinates'][1]['x'], item()?['boundingBox']?['polygon']['coordinates'][0]['x']), float(2)))), '#0')), 'D3')\r\n)),\r\n'Raw0X',\r\nitem()?['boundingBox']?['polygon']['coordinates'][0]['x']),\r\n'RawMidX',\r\nadd(item()?['boundingBox']?['polygon']['coordinates'][0]['x'], div(sub(item()?['boundingBox']?['polygon']['coordinates'][1]['x'], item()?['boundingBox']?['polygon']['coordinates'][0]['x']), float(2)))),\r\n'HalfCharLengthX',\r\ndiv(length(item()?['text']), float(400)))"}},"StaticPageVariables":{"runAfter":{"Select_AddProperties":["Succeeded"]},"metadata":{"operationMetadataId":"f14f35fb-af34-4d5f-bb5e-85358645ffaa"},"type":"Compose","inputs":{"ZoomX":"@if(equals(1, length(body('Filter_array_RemoveUnselectedPageBlanks'))), 190,\r\nif(equals(2, length(body('Filter_array_RemoveUnselectedPageBlanks'))), 150,\r\nif(equals(3, length(body('Filter_array_RemoveUnselectedPageBlanks'))), 80, \r\n50)))","Min0X":"@first(Sort(body('Select_AddProperties'), 'Raw0X'))?['Raw0X']","Min0XRawMidX":"@first(Sort(body('Select_AddProperties'), 'Raw0X'))?['RawMidX']","Min0XHalfCharLengthX":"@first(Sort(body('Select_AddProperties'), 'Raw0X'))?['HalfCharLengthX']","MinSortY":"@First(Sort(body('Select_AddProperties'), 'SortY'))?['SortY']","MaxSortY":"@Last(Sort(body('Select_AddProperties'), 'SortY'))?['SortY']"},"description":" ZoomX sets the multiple/#spaces per coordinate point, 200=More Accurate 100=Less GPT Tokens. MinX variables help calculate left-margin cut-offs & save characters. Min & Max Y cut top & bot margins & help calculate the txt replica line breaks."}},"runAfter":{"Filter_array_RemoveUnselectedPageBlanks":["Succeeded"]},"metadata":{"operationMetadataId":"20fc68dc-7957-4123-86f6-a2e3757db81c"},"type":"Foreach","description":"Uses the identified text & associated text coordinates to build a text (txt) output that approximates all the text & text positioning in an image or PDF","runtimeConfiguration":{"concurrency":{"repetitions":50}}},"Filter_array_RemoveUnselectedPageBlanks":{"runAfter":{"Select_ReturnPageResults":["Succeeded"]},"metadata":{"operationMetadataId":"402e9795-0ad4-43a8-bf84-355f478385bd"},"type":"Query","inputs":{"from":"@body('Select_ReturnPageResults')","where":"@greater(length(string(item())), 0)"}},"Select_ReturnPageResults":{"runAfter":{},"metadata":{"operationMetadataId":"f1c310d8-eb49-46b4-91de-942c301b79b6"},"type":"Select","inputs":{"from":"@range(1, length(outputs('Recognize_text_in_an_image_or_a_PDF_document')?['body/responsev2/predictionOutput/results']))","select":"@if(or(equals('All', outputs('PageNumbers')), contains(split(replace(string(outputs('PageNumbers')), ' ', ''), ','), string(item()))), \r\noutputs('Recognize_text_in_an_image_or_a_PDF_document')?['body/responsev2/predictionOutput/results']?[sub(item(), 1)],\r\n'')"}}},"runAfter":{"PageNumbers":["Succeeded"]},"metadata":{"operationMetadataId":"4f0cf070-29be-4a1c-a44d-28324997081f"},"type":"Scope"}}},"connectionReferences":{"shared_commondataserviceforapps_1":{"connectionName":"shared-commondataser-5b71dc21-f15d-4d2f-b2fe-b201a568a0f2","source":"Embedded","id":"/providers/Microsoft.PowerApps/apis/shared_commondataserviceforapps","tier":"NotSpecified"},"shared_onedriveforbusiness_1":{"connectionName":"shared-onedriveforbu-dbe16364-6a03-40ca-8199-aaebc3241387","source":"Invoker","id":"/providers/Microsoft.PowerApps/apis/shared_onedriveforbusiness","tier":"NotSpecified"}},"flowFailureAlertSubscribed":false,"isManaged":false}} \ No newline at end of file diff --git a/samples/ExtractDataFromPDFsAndImagesWithGPT/solution/GPTDataExtraction-PDFs&ImagesV2.7/Microsoft.Flow/flows/manifest.json b/samples/ExtractDataFromPDFsAndImagesWithGPT/solution/GPTDataExtraction-PDFs&ImagesV2.7/Microsoft.Flow/flows/manifest.json new file mode 100644 index 0000000..a5eeba2 --- /dev/null +++ b/samples/ExtractDataFromPDFsAndImagesWithGPT/solution/GPTDataExtraction-PDFs&ImagesV2.7/Microsoft.Flow/flows/manifest.json @@ -0,0 +1 @@ +{"packageSchemaVersion":"1.0","flowAssets":{"assetPaths":["d02adafe-f0e6-40c5-9efd-1d6196af87e7"]}} \ No newline at end of file diff --git a/samples/ExtractDataFromPDFsAndImagesWithGPT/solution/GPTDataExtraction-PDFs&ImagesV2.7/manifest.json b/samples/ExtractDataFromPDFsAndImagesWithGPT/solution/GPTDataExtraction-PDFs&ImagesV2.7/manifest.json new file mode 100644 index 0000000..b634cfe --- /dev/null +++ b/samples/ExtractDataFromPDFsAndImagesWithGPT/solution/GPTDataExtraction-PDFs&ImagesV2.7/manifest.json @@ -0,0 +1 @@ +{"schema":"1.0","details":{"displayName":"GPT Data Extraction - PDFs & Images V2.7","description":"","createdTime":"2023-08-30T18:54:23.7218933Z","packageTelemetryId":"348331b8-0755-4c65-8377-ebaa4232ed85","creator":"N/A","sourceEnvironment":""},"resources":{"d02adafe-f0e6-40c5-9efd-1d6196af87e7":{"type":"Microsoft.Flow/flows","suggestedCreationType":"New","creationType":"Existing, New, Update","details":{"displayName":"GPT Data Extraction - PDFs & Images V2.7"},"configurableBy":"User","hierarchy":"Root","dependsOn":["046d0d3e-1564-4eab-b2bc-c1abf3361f02","60e38b69-b97e-462a-a1fe-21c0ca05795f","6918a60c-aeef-4313-93dc-93a4628489df","2e6ea997-827b-4116-af72-75bccd815198"]},"046d0d3e-1564-4eab-b2bc-c1abf3361f02":{"id":"/providers/Microsoft.PowerApps/apis/shared_commondataserviceforapps","name":"shared_commondataserviceforapps","type":"Microsoft.PowerApps/apis","suggestedCreationType":"Existing","details":{"displayName":"Microsoft Dataverse","iconUri":"https://connectoricons-prod.azureedge.net/releases/v1.0.1651/1.0.1651.3382/commondataserviceforapps/icon.png"},"configurableBy":"System","hierarchy":"Child","dependsOn":[]},"60e38b69-b97e-462a-a1fe-21c0ca05795f":{"type":"Microsoft.PowerApps/apis/connections","suggestedCreationType":"Existing","creationType":"Existing","details":{"displayName":"OpExOptimization@ghsc-psm.org","iconUri":"https://connectoricons-prod.azureedge.net/releases/v1.0.1588/1.0.1588.2938/commondataserviceforapps/icon.png"},"configurableBy":"User","hierarchy":"Child","dependsOn":["046d0d3e-1564-4eab-b2bc-c1abf3361f02"]},"6918a60c-aeef-4313-93dc-93a4628489df":{"id":"/providers/Microsoft.PowerApps/apis/shared_onedriveforbusiness","name":"shared_onedriveforbusiness","type":"Microsoft.PowerApps/apis","suggestedCreationType":"Existing","details":{"displayName":"OneDrive for Business","iconUri":"https://connectoricons-prod.azureedge.net/releases/v1.0.1647/1.0.1647.3361/onedriveforbusiness/icon.png"},"configurableBy":"System","hierarchy":"Child","dependsOn":[]},"2e6ea997-827b-4116-af72-75bccd815198":{"type":"Microsoft.PowerApps/apis/connections","suggestedCreationType":"Existing","creationType":"Existing","details":{"displayName":"OpExOptimization@ghsc-psm.org","iconUri":"https://connectoricons-prod.azureedge.net/releases/v1.0.1546/1.0.1546.2665/onedriveforbusiness/icon.png"},"configurableBy":"User","hierarchy":"Child","dependsOn":["6918a60c-aeef-4313-93dc-93a4628489df"]}}} \ No newline at end of file