Skip to content

Commit

Permalink
Initial Commit
Browse files Browse the repository at this point in the history
  • Loading branch information
northy committed Aug 28, 2020
0 parents commit 961534c
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 0 deletions.
29 changes: 29 additions & 0 deletions action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: PDF To Markdown workflow
description: Updates README.md generating PDF images
author: northy

inputs:
pdf_input:
description: 'Input PDF file'
default: 'source.pdf'
required: false
output_folder:
description: 'Output pngs folder'
default: 'output_pngs'
required: false
source_md:
description: "Source markdown file"
default: 'source.md'
required: false
output_md:
description: "Output markdown file"
default: 'README.md'
required: false

branding:
icon: 'activity'
color: 'yellow'

runs:
using: 'node12'
main: 'run.js'
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pymupdf
pydotenv
26 changes: 26 additions & 0 deletions run.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
const spawn = require('child_process').spawn;
const path = require("path");

const exec = (cmd, args=[]) => new Promise((resolve, reject) => {
console.log(`Started: ${cmd} ${args.join(" ")}`)
const app = spawn(cmd, args, { stdio: 'inherit' });
app.on('close', code => {
if(code !== 0){
err = new Error(`Invalid status code: ${code}`);
err.code = code;
return reject(err);
};
return resolve(code);
});
app.on('error', reject);
});

const main = async () => {
await exec('bash', [path.join(__dirname, './run.sh')]);
};

main().catch(err => {
console.error(err);
console.error(err.stack);
process.exit(err.code || -1);
})
17 changes: 17 additions & 0 deletions run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/sh
set -e

wget -O split.py https://raw.githubusercontent.com/northy/pdf-to-markdown-workflow/master/split.py

python split.py ${INPUT_PDF_INPUT} ${INPUT_OUTPUT_FOLDER} ${INPUT_SOURCE_MD} ${INPUT_OUTPUT_MD}

rm split.py

git config --local user.email "[email protected]"
git config --local user.name "GitHub Action"
git add ${INPUT_OUTPUT_FOLDER} ${INPUT_OUTPUT_MD}
git commit -m "Compile MD"

remote_repo="https://${GITHUB_ACTOR}:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git"

git push "${remote_repo}";
42 changes: 42 additions & 0 deletions split.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import fitz, sys, os

pdf_input = sys.argv[1]
output_folder = sys.argv[2]
source_md = sys.argv[3]
output_md = sys.argv[4]

if not os.path.exists(output_folder):
os.makedirs(output_folder)

doc = fitz.open(pdf_input)
source = open(source_md,'r',encoding='utf-8')
lines = source.readlines()
source.close()

s = 0
e = 0

for i in range(len(lines)) :
if lines[i].find("<!-- PDF-TO-MARKDOWN:START -->")!=-1 : s = i
if lines[i].find("<!-- PDF-TO-MARKDOWN:END -->")!=-1 : e = i
for i in range(e-s-1) :
del lines[s+1]

i=1
imgs = []
while True :
try :
page = doc.loadPage(i-1)
pix = page.getPixmap()
output = f"{output_folder}/page{i}.png"
imgs.append(f'![Page {i}]({output} "Page {i}")\n---\n')
pix.writePNG(output)
except :
break
i+=1

lines = lines[:s+1]+imgs+lines[s+1:]

md = open(output_md,'w',encoding='utf-8')
md.writelines(lines)
md.close()

0 comments on commit 961534c

Please sign in to comment.