From PDF to Javascript

Resoning

Been able to modify the content or view, as not everything should be adjusted to a printer.

Process

Packages available

Package Advantage on PDF
Mozilla pdfjs Easy rendering
pdf-lib Easy extracting
Ruksa pdfjs Easy creation
js-pdf Complete but not as powerful

Extracting from the old PDF and modify

Read local file with Mozilla pdflib

  1. Make and file input button and then process the uploaded file using the FileReader web API.

    1
    <input type="file" id="file-selector" accept=".pdf" onChange={onFileSelected} />
  2. Filereader API works with callbacks, but async/await can be used with as a helper.

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    // helper function
    function readFileAsync(file) {
    return new Promise((resolve, reject) => {
    let reader = new FileReader();
    reader.onload = () => {
    resolve(reader.result);
    };
    reader.onerror = reject;
    reader.readAsArrayBuffer(file);
    });
    }

    // selector to upload file
    const onFileSelected = async (e) => {
    const fileList = e.target.files;
    if (fileList?.length > 0) {
    // store it as an array buffer
    const pdfArrayBuffer = await readFileAsync(fileList[0]);
    }
    };

Extract PDF file

  1. Get an array with the page numbers of the PDF

    1
    2
    3
    4
    5
    function range(start, end) {
    let length = end - start + 1;
    return Array.from({ length }, (_, i) => start + i - 1);
    // add -1 at the end, as number in programming start in 0
    }
  2. Extraction

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    import { PDFDocument } from "pdf-lib";

    // we pass the buffer we got on reading step as input
    async function extractPdfPage(arrayBuff) {
    const pdfSrcDoc = await PDFDocument.load(arrayBuff);
    const pdfNewDoc = await PDFDocument.create();
    // copy only the desired pages
    const pages = await pdfNewDoc.copyPages(pdfSrcDoc,range(2,3));
    pages.forEach(page=>pdfNewDoc.addPage(page));
    const newpdf= await pdfNewDoc.save();
    return newpdf;
    }
    // it returns an Uint8Array

Render the new PDF in the browser

  1. Make a URL out of it and render it inside an iframe.

    1
    2
    3
    4
    5
    6
    7
    function renderPdf(uint8array) {
    const tempblob = new Blob([uint8array], {
    type: "application/pdf",
    });
    const docUrl = URL.createObjectURL(tempblob);
    setPdfFileData(docUrl);
    }
  2. You may use your custom PDF viewer using the pdfjs library as I mentioned above.

Download the new PDF

Use the download function from your browser.

Full code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import { useState } from "react";
import { PDFDocument } from "pdf-lib";

export default function Home() {
const [pdfFileData, setPdfFileData] = useState();

function readFileAsync(file) {
return new Promise((resolve, reject) => {
let reader = new FileReader();
reader.onload = () => {
resolve(reader.result);
};
reader.onerror = reject;
reader.readAsArrayBuffer(file);
});
}

function renderPdf(uint8array) {
const tempblob = new Blob([uint8array], {
type: "application/pdf",
});
const docUrl = URL.createObjectURL(tempblob);
setPdfFileData(docUrl);
}

function range(start, end) {
let length = end - start + 1;
return Array.from({ length }, (_, i) => start + i - 1);
}

async function extractPdfPage(arrayBuff) {
const pdfSrcDoc = await PDFDocument.load(arrayBuff);
const pdfNewDoc = await PDFDocument.create();
const pages = await pdfNewDoc.copyPages(pdfSrcDoc, range(2, 3));
pages.forEach((page) => pdfNewDoc.addPage(page));
const newpdf = await pdfNewDoc.save();
return newpdf;
}

// Execute when user select a file
const onFileSelected = async (e) => {
const fileList = e.target.files;
if (fileList?.length > 0) {
const pdfArrayBuffer = await readFileAsync(fileList[0]);
const newPdfDoc = await extractPdfPage(pdfArrayBuffer);
renderPdf(newPdfDoc);
}
};

return (
<>
<h1>Hello world</h1>
<input
type="file"
id="file-selector"
accept=".pdf"
onChange={onFileSelected}
/>
<iframe
style={{ display: "block", width: "100vw", height: "90vh" }}
title="PdfFrame"
src={pdfFileData}
frameborder="0"
type="application/pdf"
></iframe>
</>
);
}