-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Migration support for older mupdf-js library (#105)
* gitignore IDE files Took 57 minutes * update package-lock.json Took 1 minute * fix paths in existing test suite, gitignore test output file Took 33 seconds * drawPageAsPng task function Took 23 minutes * drawPageAsHtml task function, with corresponding wasm function Took 22 minutes * update test name Took 28 minutes * drawPageAsSvg function, with corresponding wasm function Took 7 minutes * getPageText function, with corresponding wasm function Took 13 minutes * searchPageText function Took 11 minutes * code style tweaks Took 5 minutes * standardise function name Took 1 hour 35 minutes * initial migration guide Took 39 minutes * fix casing Took 5 seconds * other fixes to docs Took 1 minute * refactor: change function name casing for tasks Took 6 minutes * refactor: remove C implementation of drawPageAsSVG task Took 13 minutes * docs: update migration docs with function name change Took 5 minutes
- Loading branch information
Showing
11 changed files
with
342 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,5 @@ dist | |
node_modules | ||
.next | ||
build | ||
.idea | ||
/docs/venv/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
.. include:: ../../header.rst | ||
|
||
.. _How_To_Guide_Migration: | ||
|
||
Migrating from `mupdf-js` | ||
=========================== | ||
|
||
This guide is intended to help you migrate from the https://github.com/andytango/mupdf-js | ||
library to this one. | ||
|
||
Whilst this package offers a more comprehensive API, we also provide functions | ||
that are similar to those in `mupdf-js` to make the migration easier. These are | ||
available in the `mupdf/tasks` module. | ||
|
||
1. Initialization | ||
------------------- | ||
|
||
Unlike `mupdf-js`, you don't need to initialize the library before using it. | ||
|
||
So you can remove code like this: | ||
|
||
.. code-block:: javascript | ||
import { createMuPdf } from "mupdf-js"; | ||
async function handleSomePdf(file: File) { | ||
const mupdf = await createMuPdf(); // this is no longer needed | ||
} | ||
---- | ||
|
||
2. Loading a document | ||
------------------- | ||
|
||
Just like with `mupdf-js`, you can load a document either as a Buffer | ||
(in Node.js), an ArrayBuffer (in the browser), or a Uint8Array (in both environments). | ||
|
||
We provide a `loadPDF` function that is similar to the `load` method in `mupdf-js`. | ||
So you can replace this: | ||
|
||
.. code-block:: javascript | ||
import { createMuPdf } from "mupdf-js"; | ||
async function handleSomePdf(file) { | ||
const mupdf = await createMuPdf(); | ||
const buf = await file.arrayBuffer(); | ||
const arrayBuf = new Uint8Array(buf); | ||
const doc = mupdf.load(arrayBuf); | ||
} | ||
---- | ||
|
||
With this: | ||
|
||
.. code-block:: javascript | ||
import { loadPDF } from "mupdf/tasks"; | ||
async function handleSomePdf(file) { | ||
const buf = await file.arrayBuffer(); | ||
const arrayBuf = new Uint8Array(buf); | ||
const doc = loadPDF(arrayBuf); // Returns a Document instance | ||
} | ||
---- | ||
|
||
3. Converting a page to an image | ||
------------------- | ||
|
||
In `mupdf-js`, you would convert a page to an image like this: | ||
|
||
.. code-block:: javascript | ||
import { createMuPdf } from "mupdf-js"; | ||
async function handleSomePdf(file) { | ||
const mupdf = await createMuPdf(); | ||
const buf = await file.arrayBuffer(); | ||
const arrayBuf = new Uint8Array(buf); | ||
const doc = mupdf.load(arrayBuf); | ||
// Each of these returns a string: | ||
const png = mupdf.drawPageAsPNG(doc, 1, 300); | ||
const svg = mupdf.drawPageAsSVG(doc, 1); | ||
const html = mupdf.drawPageAsHTML(doc, 1); | ||
} | ||
---- | ||
|
||
Here's how you would do it with this package: | ||
|
||
.. code-block:: javascript | ||
import { | ||
loadPDF, | ||
drawPageAsPNG, | ||
drawPageAsSVG, | ||
drawPageAsHTML | ||
} from "mupdf/tasks"; | ||
async function handleSomePdf(file) { | ||
const buf = await file.arrayBuffer(); | ||
const arrayBuf = new Uint8Array(buf); | ||
const doc = loadPDF(arrayBuf); | ||
// Each of these returns a string: | ||
const png = drawPageAsPNG(doc, 1, 300); | ||
const svg = drawPageAsSVG(doc, 1); | ||
const html = drawPageAsHTML(doc, 1); | ||
} | ||
---- | ||
|
||
4. Text operations | ||
------------------- | ||
|
||
Finally, we provide two functions to replace the `mupdf-js` `getPageText` and | ||
`searchPageText` functions: | ||
|
||
.. code-block:: javascript | ||
import { | ||
loadPDF, | ||
getPageText, | ||
searchPageText | ||
} from "mupdf/tasks"; | ||
async function handleSomePdf(file) { | ||
const buf = await file.arrayBuffer(); | ||
const arrayBuf = new Uint8Array(buf); | ||
const doc = loadPDF(arrayBuf); | ||
// Returns plain text for the first page | ||
const pageText = getPageText(doc, 1); | ||
// Returns an array of objects with the bounding box for each match: | ||
const searchResults = searchPageText(doc, 1, "some text"); | ||
} | ||
---- | ||
|
||
5. Tests | ||
------------------- | ||
|
||
You can also | ||
`see the tests <https://github.com/ArtifexSoftware/mupdf.js/blob/master/examples/tests/src/tasks.test.ts>`_ | ||
for these functions for more examples of how to use them. | ||
|
||
|
||
.. include:: ../../footer.rst |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,3 +22,6 @@ dist-ssr | |
*.njsproj | ||
*.sln | ||
*.sw? | ||
|
||
# Test output files | ||
/src/resources/output* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import {describe, expect, it} from 'vitest' | ||
import path from "path" | ||
import * as fs from "node:fs" | ||
import * as mupdf from "../../../dist/mupdf" | ||
import {drawPageAsHTML, drawPageAsPNG, drawPageAsSVG, getPageText, loadPDF, searchPageText} from "../../../dist/tasks" | ||
|
||
const scriptdir = path.resolve(__dirname) | ||
const filename = path.join(scriptdir, "..", "test.pdf") | ||
const outputDir = path.join(scriptdir, "resources") | ||
|
||
const file = fs.readFileSync(filename) | ||
|
||
describe("loadPDF", () => { | ||
it("successfully loads a PDF document", () => { | ||
const file = fs.readFileSync(filename) | ||
let document: null | mupdf.PDFDocument = null | ||
|
||
expect(() => { | ||
document = loadPDF(file) | ||
}).not.toThrow() | ||
|
||
expect(document).not.toBeNull() | ||
}) | ||
}) | ||
|
||
describe("drawPageAsPng", () => { | ||
it("successfully renders a page as PNG", () => { | ||
const result = drawPageAsPNG(loadPDF(file), 0, 150) | ||
expect(result).toHaveLength(173738) | ||
fs.writeFileSync( | ||
path.join(outputDir, "output-tasks.png"), | ||
Buffer.from(result) | ||
) | ||
}) | ||
}) | ||
|
||
describe("drawPageAsHtml", () => { | ||
it("successfully renders a page as HTML", () => { | ||
const result = drawPageAsHTML(loadPDF(file), 0, 0) | ||
expect(result).toHaveLength(654) | ||
fs.writeFileSync( | ||
path.join(outputDir, "output-tasks.html"), | ||
Buffer.from(result) | ||
) | ||
}) | ||
}) | ||
|
||
describe("drawPageAsSvg", () => { | ||
it("successfully renders a page as SVG", () => { | ||
const result = drawPageAsSVG(loadPDF(file), 0) | ||
expect(result).toHaveLength(91467) | ||
fs.writeFileSync( | ||
path.join(outputDir, "output-tasks.svg"), | ||
Buffer.from(result) | ||
) | ||
}) | ||
}) | ||
|
||
describe("getPageText", () => { | ||
it("successfully extracts the text from page", () => { | ||
const result = getPageText(loadPDF(file), 0) | ||
expect(result).toMatchInlineSnapshot(` | ||
"Welcome to the Node server test.pdf file. | ||
Sorry there is not much to see here! | ||
1 | ||
Page 1 footer | ||
" | ||
`) | ||
}) | ||
}) | ||
|
||
describe("searchPageText", () => { | ||
it("returns an array of search results as coordinate bounding boxes", () => { | ||
const result = searchPageText(loadPDF(file), 0, "Welcome", 1) | ||
expect(result).toMatchInlineSnapshot(` | ||
[ | ||
[ | ||
[ | ||
30.7637996673584, | ||
32.626708984375, | ||
80.7696304321289, | ||
32.626708984375, | ||
30.7637996673584, | ||
46.032958984375, | ||
80.7696304321289, | ||
46.032958984375, | ||
], | ||
], | ||
] | ||
`) | ||
}) | ||
|
||
it("returns an empty array if no matches found", () => { | ||
const result = searchPageText(loadPDF(file), 0, "mupdf", 1) | ||
expect(result).toMatchInlineSnapshot(`[]`) | ||
}) | ||
}) |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.