forked from northy/pdf-to-markdown-workflow
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 961534c
Showing
5 changed files
with
116 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
name: PDF To Markdown workflow | ||
description: Updates README.md generating PDF images | ||
author: northy | ||
|
||
inputs: | ||
pdf_input: | ||
description: 'Input PDF file' | ||
default: 'source.pdf' | ||
required: false | ||
output_folder: | ||
description: 'Output pngs folder' | ||
default: 'output_pngs' | ||
required: false | ||
source_md: | ||
description: "Source markdown file" | ||
default: 'source.md' | ||
required: false | ||
output_md: | ||
description: "Output markdown file" | ||
default: 'README.md' | ||
required: false | ||
|
||
branding: | ||
icon: 'activity' | ||
color: 'yellow' | ||
|
||
runs: | ||
using: 'node12' | ||
main: 'run.js' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
pymupdf | ||
pydotenv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
const spawn = require('child_process').spawn; | ||
const path = require("path"); | ||
|
||
const exec = (cmd, args=[]) => new Promise((resolve, reject) => { | ||
console.log(`Started: ${cmd} ${args.join(" ")}`) | ||
const app = spawn(cmd, args, { stdio: 'inherit' }); | ||
app.on('close', code => { | ||
if(code !== 0){ | ||
err = new Error(`Invalid status code: ${code}`); | ||
err.code = code; | ||
return reject(err); | ||
}; | ||
return resolve(code); | ||
}); | ||
app.on('error', reject); | ||
}); | ||
|
||
const main = async () => { | ||
await exec('bash', [path.join(__dirname, './run.sh')]); | ||
}; | ||
|
||
main().catch(err => { | ||
console.error(err); | ||
console.error(err.stack); | ||
process.exit(err.code || -1); | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#!/bin/sh | ||
set -e | ||
|
||
wget -O split.py https://raw.githubusercontent.com/northy/pdf-to-markdown-workflow/master/split.py | ||
|
||
python split.py ${INPUT_PDF_INPUT} ${INPUT_OUTPUT_FOLDER} ${INPUT_SOURCE_MD} ${INPUT_OUTPUT_MD} | ||
|
||
rm split.py | ||
|
||
git config --local user.email "[email protected]" | ||
git config --local user.name "GitHub Action" | ||
git add ${INPUT_OUTPUT_FOLDER} ${INPUT_OUTPUT_MD} | ||
git commit -m "Compile MD" | ||
|
||
remote_repo="https://${GITHUB_ACTOR}:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" | ||
|
||
git push "${remote_repo}"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import fitz, sys, os | ||
|
||
pdf_input = sys.argv[1] | ||
output_folder = sys.argv[2] | ||
source_md = sys.argv[3] | ||
output_md = sys.argv[4] | ||
|
||
if not os.path.exists(output_folder): | ||
os.makedirs(output_folder) | ||
|
||
doc = fitz.open(pdf_input) | ||
source = open(source_md,'r',encoding='utf-8') | ||
lines = source.readlines() | ||
source.close() | ||
|
||
s = 0 | ||
e = 0 | ||
|
||
for i in range(len(lines)) : | ||
if lines[i].find("<!-- PDF-TO-MARKDOWN:START -->")!=-1 : s = i | ||
if lines[i].find("<!-- PDF-TO-MARKDOWN:END -->")!=-1 : e = i | ||
for i in range(e-s-1) : | ||
del lines[s+1] | ||
|
||
i=1 | ||
imgs = [] | ||
while True : | ||
try : | ||
page = doc.loadPage(i-1) | ||
pix = page.getPixmap() | ||
output = f"{output_folder}/page{i}.png" | ||
imgs.append(f'\n---\n') | ||
pix.writePNG(output) | ||
except : | ||
break | ||
i+=1 | ||
|
||
lines = lines[:s+1]+imgs+lines[s+1:] | ||
|
||
md = open(output_md,'w',encoding='utf-8') | ||
md.writelines(lines) | ||
md.close() |