diff --git a/internal/cli/organize.go b/internal/cli/organize.go new file mode 100644 index 0000000..cdd7571 --- /dev/null +++ b/internal/cli/organize.go @@ -0,0 +1,75 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package cli + +import ( + "errors" + "fmt" + + "github.com/spf13/cobra" + "github.com/unidoc/unipdf-cli/pkg/pdf" +) + +const organizeCmdDesc = `Split PDF files. + +The command is used to organize one or more page ranges from the input file +and save the result as the output file. +If no page range is specified, all the pages from the input file will be +copied to the output file. + +An example of the pages parameter: 1-3,4,6-7 +Only pages 1,2,3 (1-3), 4 and 6,7 (6-7) will be present in the output file, +while page number 5 is skipped. +` + +var organizeCmdExample = fmt.Sprintf("%s\n%s\n", + fmt.Sprintf("%s organize input_file.pdf output_file.pdf 1-2", appName), + fmt.Sprintf("%s organize -p pass input_file.pd output_file.pdf 1-2,4", appName), +) + +// organizeCmd represents the split command. +var organizeCmd = &cobra.Command{ + Use: "organize [FLAG]... INPUT_FILE OUTPUT_FILE [PAGES]", + Short: "Organize PDF files", + Long: organizeCmdDesc, + Example: organizeCmdExample, + DisableFlagsInUseLine: true, + Run: func(cmd *cobra.Command, args []string) { + inputPath := args[0] + outputPath := args[1] + password, _ := cmd.Flags().GetString("password") + + // Parse page range. + var err error + var pages []int + + if len(args) > 2 { + if pages, err = parsePageRangeUnsorted(args[2]); err != nil { + printUsageErr(cmd, "Invalid page range specified\n") + } + } + + if err := pdf.Organize(inputPath, outputPath, password, pages); err != nil { + printErr("Error: %s\n", err) + } + + fmt.Printf("Successfully organized file %s\n", inputPath) + fmt.Printf("Output file saved to %s\n", outputPath) + }, + Args: func(_ *cobra.Command, args []string) error { + if len(args) < 2 { + return errors.New("must provide at least the input and output files") + } + + return nil + }, +} + +func init() { + rootCmd.AddCommand(organizeCmd) + + organizeCmd.Flags().StringP("password", "p", "", "input file password") +} diff --git a/internal/cli/utils.go b/internal/cli/utils.go index e52d9c6..b97d7a2 100644 --- a/internal/cli/utils.go +++ b/internal/cli/utils.go @@ -39,7 +39,6 @@ func parsePageRange(pageRange string) ([]int, error) { } indices := strings.Split(rng, "-") - lenIndices := len(indices) if lenIndices > 2 { return nil, errors.New("invalid page range") @@ -86,6 +85,61 @@ func parsePageRange(pageRange string) ([]int, error) { return pages, nil } +func parsePageRangeUnsorted(pageRange string) ([]int, error) { + var pages []int + + rngs := strings.Split(removeSpaces(pageRange), ",") + for _, rng := range rngs { + if rng == "" { + continue + } + + indices := strings.Split(rng, "-") + lenIndices := len(indices) + if lenIndices > 2 { + return nil, errors.New("invalid page range") + } + if lenIndices == 2 { + start, err := strconv.Atoi(indices[0]) + if err != nil { + return nil, errors.New("invalid start page number") + } + if start < 1 { + return nil, errors.New("page range start must be greater than 0") + } + + end, err := strconv.Atoi(indices[1]) + if err != nil { + return nil, errors.New("invalid end page number") + } + if end < 1 { + return nil, errors.New("page range end must be greater than 0") + } + + if start > end { + return nil, errors.New("page range end must be greater than the start") + } + + for page := start; page <= end; page++ { + pages = append(pages, page) + } + + continue + } + + page, err := strconv.Atoi(indices[0]) + if err != nil { + return nil, errors.New("invalid page number") + } + + pages = append(pages, page) + } + + pages = uniqueIntSlice(pages) + + return pages, nil +} + func parseInputPaths(inputPaths []string, recursive bool, matcher fileMatcher) ([]string, error) { var err error var files []string diff --git a/pkg/pdf/organize.go b/pkg/pdf/organize.go new file mode 100644 index 0000000..17890fe --- /dev/null +++ b/pkg/pdf/organize.go @@ -0,0 +1,152 @@ +/* + * This file is subject to the terms and conditions defined in + * file 'LICENSE.md', which is part of this source code package. + */ + +package pdf + +import ( + "github.com/unidoc/unipdf/v3/common" + unipdf "github.com/unidoc/unipdf/v3/model" +) + +// Organize extracts the provided page list from PDF file specified by the +// inputPath parameter then merges the individual pages and saves the +// resulting file at the location specified by the outputPath parameter. +// A password can be passed in for encrypted input files. +func Organize(inputPath, outputPath, password string, pages []int) error { + // Read input file. + pdfReader, _, _, _, err := readPDF(inputPath, password) + if err != nil { + return err + } + + // Add selected pages to the writer. + pdfWriter := unipdf.NewPdfWriter() + + for i := 0; i < len(pages); i++ { + page, err := pdfReader.GetPage(pages[i]) + if err != nil { + return err + } + + err = pdfWriter.AddPage(page) + if err != nil { + return err + } + } + + // Copy PDF version. + version := pdfReader.PdfVersion() + pdfWriter.SetVersion(version.Major, version.Minor) + + // Copy PDF info. + info, err := pdfReader.GetPdfInfo() + if err != nil { + common.Log.Debug("ERROR: %v", err) + } else { + pdfWriter.SetDocInfo(info) + } + + // Copy Catalog Metadata. + if meta, ok := pdfReader.GetCatalogMetadata(); ok { + if err := pdfWriter.SetCatalogMetadata(meta); err != nil { + return err + } + } + + // Copy catalog mark information. + if markInfo, ok := pdfReader.GetCatalogMarkInfo(); ok { + if err := pdfWriter.SetCatalogMarkInfo(markInfo); err != nil { + return err + } + } + + // Copy AcroForm. + err = pdfWriter.SetForms(pdfReader.AcroForm) + if err != nil { + common.Log.Debug("ERROR: %v", err) + return err + } + + // Copy viewer preferences. + if pref, ok := pdfReader.GetCatalogViewerPreferences(); ok { + if err := pdfWriter.SetCatalogViewerPreferences(pref); err != nil { + return err + } + } + + // Copy language preferences. + if lang, ok := pdfReader.GetCatalogLanguage(); ok { + if err := pdfWriter.SetCatalogLanguage(lang); err != nil { + return err + } + } + + // Copy document outlines. + pdfWriter.AddOutlineTree(pdfReader.GetOutlineTree()) + + // Copy OC Properties. + props, err := pdfReader.GetOCProperties() + if err != nil { + common.Log.Debug("ERROR: %v", err) + } else { + err = pdfWriter.SetOCProperties(props) + if err != nil { + common.Log.Debug("ERROR: %v", err) + } + } + + // Copy page labels. + labelObj, err := pdfReader.GetPageLabels() + if err != nil { + common.Log.Debug("ERROR: %v", err) + } else { + err = pdfWriter.SetPageLabels(labelObj) + if err != nil { + common.Log.Debug("ERROR: %v", err) + } + } + + // Copy named destinations. + namedDest, err := pdfReader.GetNamedDestinations() + if err != nil { + common.Log.Debug("ERROR: %v", err) + } else { + err = pdfWriter.SetNamedDestinations(namedDest) + if err != nil { + common.Log.Debug("ERROR: %v", err) + } + } + + // Copy name dictionary. + nameDict, err := pdfReader.GetNameDictionary() + if err != nil { + common.Log.Debug("ERROR: %v", err) + } else { + err = pdfWriter.SetNameDictionary(nameDict) + if err != nil { + common.Log.Debug("ERROR: %v", err) + } + } + + // Copy StructTreeRoot dictionary. + structTreeRoot, found := pdfReader.GetCatalogStructTreeRoot() + if found { + err := pdfWriter.SetCatalogStructTreeRoot(structTreeRoot) + if err != nil { + common.Log.Debug("ERROR: %v", err) + } + } + + // Copy global page rotation. + if pdfReader.Rotate != nil { + if err := pdfWriter.SetRotation(*pdfReader.Rotate); err != nil { + common.Log.Debug("ERROR: %v", err) + } + } + + // Write output file. + safe := inputPath == outputPath + return writePDF(outputPath, &pdfWriter, safe) +}