twain3.0/3rdparty/hgOCR/leptonica/pdfio2.c

/*====================================================================*
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
 -
 -  Redistribution and use in source and binary forms, with or without
 -  modification, are permitted provided that the following conditions
 -  are met:
 -  1. Redistributions of source code must retain the above copyright
 -     notice, this list of conditions and the following disclaimer.
 -  2. Redistributions in binary form must reproduce the above
 -     copyright notice, this list of conditions and the following
 -     disclaimer in the documentation and/or other materials
 -     provided with the distribution.
 -
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *====================================================================*/

/*!
 * \file pdfio2.c
 * <pre>
 *
 *    Lower-level operations for generating pdf.
 *
 *     Intermediate function for single page, multi-image conversion
 *          l_int32              pixConvertToPdfData()
 *
 *     Intermediate function for generating multipage pdf output
 *          l_int32              ptraConcatenatePdfToData()
 *
 *     Convert tiff multipage to pdf file
 *          l_int32              convertTiffMultipageToPdf()
 *
 *     Low-level CID-based operations
 *
 *       Without transcoding
 *          l_int32              l_generateCIDataForPdf()
 *          L_COMP_DATA         *l_generateFlateDataPdf()
 *          L_COMP_DATA         *l_generateJpegData()
 *          L_COMP_DATA         *l_generateJpegDataMem()
 *          static L_COMP_DATA  *l_generateJp2kData()
 *
 *       With transcoding
 *          l_int32              l_generateCIData()
 *          l_int32              pixGenerateCIData()
 *          L_COMP_DATA         *l_generateFlateData()
 *          static L_COMP_DATA  *pixGenerateFlateData()
 *          static L_COMP_DATA  *pixGenerateJpegData()
 *          static L_COMP_DATA  *pixGenerateJp2kData()
 *          static L_COMP_DATA  *pixGenerateG4Data()
 *          L_COMP_DATA         *l_generateG4Data()
 *
 *       Other
 *          l_int32              cidConvertToPdfData()
 *          void                 l_CIDataDestroy()
 *
 *     Helper functions for generating the output pdf string
 *          static l_int32       l_generatePdf()
 *          static void          generateFixedStringsPdf()
 *          static char         *generateEscapeString()
 *          static void          generateMediaboxPdf()
 *          static l_int32       generatePageStringPdf()
 *          static l_int32       generateContentStringPdf()
 *          static l_int32       generatePreXStringsPdf()
 *          static l_int32       generateColormapStringsPdf()
 *          static void          generateTrailerPdf()
 *          static l_int32       makeTrailerStringPdf()
 *          static l_int32       generateOutputDataPdf()
 *
 *     Helper functions for generating multipage pdf output
 *          static l_int32       parseTrailerPdf()
 *          static char         *generatePagesObjStringPdf()
 *          static L_BYTEA      *substituteObjectNumbers()
 *
 *     Create/destroy/access pdf data
 *          static L_PDF_DATA   *pdfdataCreate()
 *          static void          pdfdataDestroy()
 *          static L_COMP_DATA  *pdfdataGetCid()
 *
 *     Set flags for special modes
 *          void                 l_pdfSetG4ImageMask()
 *          void                 l_pdfSetDateAndVersion()
 * </pre>
 */

#include <string.h>
#include <math.h>
#include "allheaders.h"

/* --------------------------------------------*/
#if  USE_PDFIO   /* defined in environ.h */
 /* --------------------------------------------*/

    /* Typical scan resolution in ppi (pixels/inch) */
static const l_int32  DefaultInputRes = 300;

    /* Static helpers */
static L_COMP_DATA  *l_generateJp2kData(const char *fname);
static L_COMP_DATA  *pixGenerateFlateData(PIX *pixs, l_int32 ascii85flag);
static L_COMP_DATA  *pixGenerateJpegData(PIX *pixs, l_int32 ascii85flag,
                                         l_int32 quality);
static L_COMP_DATA  *pixGenerateJp2kData(PIX *pixs, l_int32 quality);
static L_COMP_DATA  *pixGenerateG4Data(PIX *pixs, l_int32 ascii85flag);

static l_int32       l_generatePdf(l_uint8 **pdata, size_t *pnbytes,
                                   L_PDF_DATA  *lpd);
static void          generateFixedStringsPdf(L_PDF_DATA *lpd);
static char         *generateEscapeString(const char  *str);
static void          generateMediaboxPdf(L_PDF_DATA *lpd);
static l_int32       generatePageStringPdf(L_PDF_DATA *lpd);
static l_int32       generateContentStringPdf(L_PDF_DATA *lpd);
static l_int32       generatePreXStringsPdf(L_PDF_DATA *lpd);
static l_int32       generateColormapStringsPdf(L_PDF_DATA *lpd);
static void          generateTrailerPdf(L_PDF_DATA *lpd);
static char         *makeTrailerStringPdf(L_DNA *daloc);
static l_int32       generateOutputDataPdf(l_uint8 **pdata, size_t *pnbytes,
                                       L_PDF_DATA *lpd);

static l_int32       parseTrailerPdf(L_BYTEA *bas, L_DNA **pda);
static char         *generatePagesObjStringPdf(NUMA *napage);
static L_BYTEA      *substituteObjectNumbers(L_BYTEA *bas, NUMA *na_objs);

static L_PDF_DATA   *pdfdataCreate(const char *title);
static void          pdfdataDestroy(L_PDF_DATA **plpd);
static L_COMP_DATA  *pdfdataGetCid(L_PDF_DATA *lpd, l_int32 index);


/* ---------------- Defaults for rendering options ----------------- */
    /* Output G4 as writing through image mask; this is the default */
static l_int32   var_WRITE_G4_IMAGE_MASK = 1;
    /* Write date/time and lib version into pdf; this is the default */
static l_int32   var_WRITE_DATE_AND_VERSION = 1;

#define L_SMALLBUF   256
#define L_BIGBUF    2048   /* must be able to hold hex colormap */


#ifndef  NO_CONSOLE_IO
#define  DEBUG_MULTIPAGE      0
#endif  /* ~NO_CONSOLE_IO */


/*---------------------------------------------------------------------*
 *       Intermediate function for generating multipage pdf output     *
 *---------------------------------------------------------------------*/
/*!
 * \brief   pixConvertToPdfData()
 *
 * \param[in]      pix       all depths; cmap OK
 * \param[in]      type      L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE,
 *                           L_JP2K_ENCODE
 * \param[in]      quality   for jpeg: 1-100; 0 for default (75)
 *                           for jp2k: 27-45; 0 for default (34)
 * \param[out]     pdata     pdf array
 * \param[out]     pnbytes   number of bytes in pdf array
 * \param[in]      x, y      location of lower-left corner of image, in pixels,
 *                           relative to the PostScript origin (0,0) at
 *                           the lower-left corner of the page)
 * \param[in]      res       override the resolution of the input image, in ppi;
 *                           use 0 to respect resolution embedded in the input
 * \param[in]      title     [optional] pdf title; can be null
 * \param[in,out]  plpd      ptr to lpd; created on the first invocation and
 *                           returned until last image is processed
 * \param[in]      position  in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE,
 *                           L_LAST_IMAGE
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) If %res == 0 and the input resolution field is 0,
 *          this will use DefaultInputRes.
 *      (2) This only writes %data if it is the last image to be
 *          written on the page.
 *      (3) See comments in convertToPdf().
 * </pre>
 */
l_ok
pixConvertToPdfData(PIX          *pix,
                    l_int32       type,
                    l_int32       quality,
                    l_uint8     **pdata,
                    size_t       *pnbytes,
                    l_int32       x,
                    l_int32       y,
                    l_int32       res,
                    const char   *title,
                    L_PDF_DATA  **plpd,
                    l_int32       position)
{
l_int32       pixres, w, h, ret;
l_float32     xpt, ypt, wpt, hpt;
L_COMP_DATA  *cid = NULL;
L_PDF_DATA   *lpd = NULL;

    PROCNAME("pixConvertToPdfData");

    if (!pdata)
        return ERROR_INT("&data not defined", procName, 1);
    *pdata = NULL;
    if (!pnbytes)
        return ERROR_INT("&nbytes not defined", procName, 1);
    *pnbytes = 0;
    if (!pix)
        return ERROR_INT("pix not defined", procName, 1);
    if (type != L_JPEG_ENCODE && type != L_G4_ENCODE &&
        type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) {
        selectDefaultPdfEncoding(pix, &type);
    }
    if (plpd) {  /* part of multi-page invocation */
        if (position == L_FIRST_IMAGE)
            *plpd = NULL;
    }

        /* Generate the compressed image data.  It must NOT
         * be ascii85 encoded. */
    pixGenerateCIData(pix, type, quality, 0, &cid);
    if (!cid)
        return ERROR_INT("cid not made", procName, 1);

        /* Get media box in pts.  Guess the input image resolution
         * based on the input parameter %res, the resolution data in
         * the pix, and the size of the image. */
    pixres = cid->res;
    w = cid->w;
    h = cid->h;
    if (res <= 0.0) {
        if (pixres > 0)
            res = pixres;
        else
            res = DefaultInputRes;
    }
    xpt = x * 72. / res;
    ypt = y * 72. / res;
    wpt = w * 72. / res;
    hpt = h * 72. / res;

        /* Set up lpd */
    if (!plpd) {  /* single image */
        if ((lpd = pdfdataCreate(title)) == NULL)
            return ERROR_INT("lpd not made", procName, 1);
    } else if (position == L_FIRST_IMAGE) {  /* first of multiple images */
        if ((lpd = pdfdataCreate(title)) == NULL)
            return ERROR_INT("lpd not made", procName, 1);
        *plpd = lpd;
    } else {  /* not the first of multiple images */
        lpd = *plpd;
    }

        /* Add the data to the lpd */
    ptraAdd(lpd->cida, cid);
    lpd->n++;
    ptaAddPt(lpd->xy, xpt, ypt);
    ptaAddPt(lpd->wh, wpt, hpt);

        /* If a single image or the last of multiple images,
         * generate the pdf and destroy the lpd */
    if (!plpd || (position == L_LAST_IMAGE)) {
        ret = l_generatePdf(pdata, pnbytes, lpd);
        pdfdataDestroy(&lpd);
        if (plpd) *plpd = NULL;
        if (ret)
            return ERROR_INT("pdf output not made", procName, 1);
    }

    return 0;
}


/*---------------------------------------------------------------------*
 *      Intermediate function for generating multipage pdf output      *
 *---------------------------------------------------------------------*/
/*!
 * \brief   ptraConcatenatePdfToData()
 *
 * \param[in]    pa_data    ptra array of pdf strings, each for a
 *                          single-page pdf file
 * \param[in]    sa         [optional] string array of pathnames for
 *                          input pdf files; can be null
 * \param[out]   pdata      concatenated pdf data in memory
 * \param[out]   pnbytes    number of bytes in pdf data
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) This only works with leptonica-formatted single-page pdf files.
 *          pdf files generated by other programs will have unpredictable
 *          (and usually bad) results.  The requirements for each pdf file:
 *            (a) The Catalog and Info objects are the first two.
 *            (b) Object 3 is Pages
 *            (c) Object 4 is Page
 *            (d) The remaining objects are Contents, XObjects, and ColorSpace
 *      (2) We remove trailers from each page, and append the full trailer
 *          for all pages at the end.
 *      (3) For all but the first file, remove the ID and the first 3
 *          objects (catalog, info, pages), so that each subsequent
 *          file has only objects of these classes:
 *              Page, Contents, XObject, ColorSpace (Indexed RGB).
 *          For those objects, we substitute these refs to objects
 *          in the local file:
 *              Page:  Parent(object 3), Contents, XObject(typically multiple)
 *              XObject:  [ColorSpace if indexed]
 *          The Pages object on the first page (object 3) has a Kids array
 *          of references to all the Page objects, with a Count equal
 *          to the number of pages.  Each Page object refers back to
 *          this parent.
 * </pre>
 */
l_ok
ptraConcatenatePdfToData(L_PTRA    *pa_data,
                         SARRAY    *sa,
                         l_uint8  **pdata,
                         size_t    *pnbytes)
{
char     *fname, *str_pages, *str_trailer;
l_uint8  *pdfdata, *data;
l_int32   i, j, index, nobj, npages;
l_int32  *sizes, *locs;
size_t    size;
L_BYTEA  *bas, *bad, *bat1, *bat2;
L_DNA    *da_locs, *da_sizes, *da_outlocs, *da;
L_DNAA   *daa_locs;  /* object locations on each page */
NUMA     *na_objs, *napage;
NUMAA    *naa_objs;  /* object mapping numbers to new values */

    PROCNAME("ptraConcatenatePdfToData");

    if (!pdata)
        return ERROR_INT("&data not defined", procName, 1);
    *pdata = NULL;
    if (!pnbytes)
        return ERROR_INT("&nbytes not defined", procName, 1);
    *pnbytes = 0;
    if (!pa_data)
        return ERROR_INT("pa_data not defined", procName, 1);

        /* Parse the files and find the object locations.
         * Remove file data that cannot be parsed. */
    ptraGetActualCount(pa_data, &npages);
    daa_locs = l_dnaaCreate(npages);
    for (i = 0; i < npages; i++) {
        bas = (L_BYTEA *)ptraGetPtrToItem(pa_data, i);
        if (parseTrailerPdf(bas, &da_locs) != 0) {
            bas = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
            l_byteaDestroy(&bas);
            if (sa) {
                fname = sarrayGetString(sa, i, L_NOCOPY);
                L_ERROR("can't parse file %s; skipping\n", procName, fname);
            } else {
                L_ERROR("can't parse file %d; skipping\n", procName, i);
            }
        } else {
            l_dnaaAddDna(daa_locs, da_locs, L_INSERT);
        }
    }

        /* Recompute npages in case some of the files were not pdf */
    ptraCompactArray(pa_data);
    ptraGetActualCount(pa_data, &npages);
    if (npages == 0) {
        l_dnaaDestroy(&daa_locs);
        return ERROR_INT("no parsable pdf files found", procName, 1);
    }

        /* Find the mapping from initial to final object numbers */
    naa_objs = numaaCreate(npages);  /* stores final object numbers */
    napage = numaCreate(npages);  /* stores "Page" object numbers */
    index = 0;
    for (i = 0; i < npages; i++) {
        da = l_dnaaGetDna(daa_locs, i, L_CLONE);
        nobj = l_dnaGetCount(da);
        if (i == 0) {
            numaAddNumber(napage, 4);  /* object 4 on first page */
            na_objs = numaMakeSequence(0.0, 1.0, nobj - 1);
            index = nobj - 1;
        } else {  /* skip the first 3 objects in each file */
            numaAddNumber(napage, index);  /* Page object is first we add */
            na_objs = numaMakeConstant(0.0, nobj - 1);
            numaReplaceNumber(na_objs, 3, 3);  /* refers to parent of all */
            for (j = 4; j < nobj - 1; j++)
                numaSetValue(na_objs, j, index++);
        }
        numaaAddNuma(naa_objs, na_objs, L_INSERT);
        l_dnaDestroy(&da);
    }

        /* Make the Pages object (#3) */
    str_pages = generatePagesObjStringPdf(napage);

        /* Build the output */
    bad = l_byteaCreate(5000);
    da_outlocs = l_dnaCreate(0);  /* locations of all output objects */
    for (i = 0; i < npages; i++) {
        bas = (L_BYTEA *)ptraGetPtrToItem(pa_data, i);
        pdfdata = l_byteaGetData(bas, &size);
        da_locs = l_dnaaGetDna(daa_locs, i, L_CLONE);  /* locs on this page */
        na_objs = numaaGetNuma(naa_objs, i, L_CLONE);  /* obj # on this page */
        nobj = l_dnaGetCount(da_locs) - 1;
        da_sizes = l_dnaDiffAdjValues(da_locs);  /* object sizes on this page */
        sizes = l_dnaGetIArray(da_sizes);
        locs = l_dnaGetIArray(da_locs);
        if (i == 0) {
            l_byteaAppendData(bad, pdfdata, sizes[0]);
            l_byteaAppendData(bad, pdfdata + locs[1], sizes[1]);
            l_byteaAppendData(bad, pdfdata + locs[2], sizes[2]);
            l_byteaAppendString(bad, str_pages);
            for (j = 0; j < 4; j++)
                l_dnaAddNumber(da_outlocs, locs[j]);
        }
        for (j = 4; j < nobj; j++) {
            l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad));
            bat1 = l_byteaInitFromMem(pdfdata + locs[j], sizes[j]);
            bat2 = substituteObjectNumbers(bat1, na_objs);
            data = l_byteaGetData(bat2, &size);
            l_byteaAppendData(bad, data, size);
            l_byteaDestroy(&bat1);
            l_byteaDestroy(&bat2);
        }
        if (i == npages - 1)  /* last one */
            l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad));
        LEPT_FREE(sizes);
        LEPT_FREE(locs);
        l_dnaDestroy(&da_locs);
        numaDestroy(&na_objs);
        l_dnaDestroy(&da_sizes);
    }

        /* Add the trailer */
    str_trailer = makeTrailerStringPdf(da_outlocs);
    l_byteaAppendString(bad, str_trailer);

        /* Transfer the output data */
    *pdata = l_byteaCopyData(bad, pnbytes);
    l_byteaDestroy(&bad);

#if  DEBUG_MULTIPAGE
    fprintf(stderr, "******** object mapper **********");
    numaaWriteStream(stderr, naa_objs);

    fprintf(stderr, "******** Page object numbers ***********");
    numaWriteStream(stderr, napage);

    fprintf(stderr, "******** Pages object ***********\n");
    fprintf(stderr, "%s\n", str_pages);
#endif  /* DEBUG_MULTIPAGE */

    numaDestroy(&napage);
    numaaDestroy(&naa_objs);
    l_dnaDestroy(&da_outlocs);
    l_dnaaDestroy(&daa_locs);
    LEPT_FREE(str_pages);
    LEPT_FREE(str_trailer);
    return 0;
}


/*---------------------------------------------------------------------*
 *                  Convert tiff multipage to pdf file                 *
 *---------------------------------------------------------------------*/
/*!
 * \brief   convertTiffMultipageToPdf()
 *
 * \param[in]    filein    (tiff)
 * \param[in]    fileout   (pdf)
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) A multipage tiff file can also be converted to PS, using
 *          convertTiffMultipageToPS()
 * </pre>
 */
l_ok
convertTiffMultipageToPdf(const char  *filein,
                          const char  *fileout)
{
l_int32  istiff;
PIXA    *pixa;
FILE    *fp;

    PROCNAME("convertTiffMultipageToPdf");

    if ((fp = fopenReadStream(filein)) == NULL)
        return ERROR_INT("file not found", procName, 1);
    istiff = fileFormatIsTiff(fp);
    fclose(fp);
    if (!istiff)
        return ERROR_INT("file not tiff format", procName, 1);

    pixa = pixaReadMultipageTiff(filein);
    pixaConvertToPdf(pixa, 0, 1.0, 0, 0, "weasel2", fileout);
    pixaDestroy(&pixa);
    return 0;
}


/*---------------------------------------------------------------------*
 *                     Low-level CID-based operations                  *
 *---------------------------------------------------------------------*/
/*!
 * \brief   l_generateCIDataForPdf()
 *
 * \param[in]    fname      [optional] can be null
 * \param[in]    pix        [optional] can be null
 * \param[in]    quality    for jpeg if transcoded: 1-100; 0 for default (75)
 *                          for jp2k if transcoded: 27-45; 0 for default (34)
 * \param[out]   pcid       compressed data
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) You must set either filename or pix.
 *      (2) Given an image file and optionally a pix raster of that data,
 *          this provides a CID that is compatible with PDF, preferably
 *          without transcoding.
 *      (3) The pix is included for efficiency, in case transcoding
 *          is required and the pix is available to the caller.
 *      (4) We don't try to open files named "stdin" or "-" for Tesseract
 *          compatibility reasons. We may remove this restriction
 *          in the future.
 * </pre>
 */
l_ok
l_generateCIDataForPdf(const char    *fname,
                       PIX           *pix,
                       l_int32        quality,
                       L_COMP_DATA  **pcid)
{
l_int32       format, type;
L_COMP_DATA  *cid;
PIX          *pixt;

    PROCNAME("l_generateCIDataForPdf");

	//FILE* file = fopenWriteStream("aaa.bmp", "w");
	//pixWriteStreamBmp(file, pix);
	//fclose(file);
    if (!pcid)
        return ERROR_INT("&cid not defined", procName, 1);
    *pcid = cid = NULL;
    if (!fname && !pix)
        return ERROR_INT("neither fname nor pix are defined", procName, 1);

        /* If a compressed file is given that is not 'stdin', see if we
         * can generate the pdf output without transcoding. */
    if (fname && strcmp(fname, "-") != 0 && strcmp(fname, "stdin") != 0) {
        findFileFormat(fname, &format);
        if (format == IFF_UNKNOWN)
            L_WARNING("file %s format is unknown\n", procName, fname);
        if (format == IFF_PS || format == IFF_LPDF) {
            L_ERROR("file %s is unsupported format %d\n",
                  procName, fname, format);
            return 1;
        }
        if (format == IFF_JFIF_JPEG) {
            cid = l_generateJpegData(fname, 0);
        } else if (format == IFF_JP2) {
            cid = l_generateJp2kData(fname);
        } else if (format == IFF_PNG) {
            cid = l_generateFlateDataPdf(fname, pix);
        }

    }

        /* Otherwise, use the pix to generate the pdf output */
    if  (!cid) {
        if (!pix)
            pixt = pixRead(fname);
        else
            pixt = pixClone(pix);
        if (!pixt)
            return ERROR_INT("pixt not made", procName, 1);
        if (selectDefaultPdfEncoding(pixt, &type)) {
            pixDestroy(&pixt);
            return 1;
        }
        pixGenerateCIData(pixt, type, quality, 0, &cid);
        pixDestroy(&pixt);
    }
    if (!cid) {
        L_ERROR("totally kerflummoxed\n", procName);
        return 1;
    }
    *pcid = cid;
    return 0;
}


/*!
 * \brief   l_generateFlateDataPdf()
 *
 * \param[in]    fname     preferably png
 * \param[in]    pixs      [optional] can be null
 * \return  cid containing png data, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) If you hand this a png file, you are going to get
 *          png predictors embedded in the flate data. So it has
 *          come to this. http://xkcd.com/1022/
 *      (2) Exception: if the png is interlaced or if it is RGBA,
 *          it will be transcoded.
 *      (3) If transcoding is required, this will not have to read from
 *          file if you also input a pix.
 * </pre>
 */
L_COMP_DATA *
l_generateFlateDataPdf(const char  *fname,
                       PIX         *pixs)
{
l_uint8      *pngcomp = NULL;  /* entire PNG compressed file */
l_uint8      *datacomp = NULL;  /* gzipped raster data */
l_uint8      *cmapdata = NULL;  /* uncompressed colormap */
char         *cmapdatahex = NULL;  /* hex ascii uncompressed colormap */
l_uint32      i, j, n;
l_int32       format, interlaced;
l_int32       ncolors;  /* in colormap */
l_int32       bps;  /* bits/sample: usually 8 */
l_int32       spp;  /* samples/pixel: 1-grayscale/cmap); 3-rgb; 4-rgba */
l_int32       w, h, cmapflag;
l_int32       xres, yres;
size_t        nbytescomp = 0, nbytespng = 0;
FILE         *fp;
L_COMP_DATA  *cid;
PIX          *pix;
PIXCMAP      *cmap = NULL;

    PROCNAME("l_generateFlateDataPdf");

    if (!fname)
        return (L_COMP_DATA *)ERROR_PTR("fname not defined", procName, NULL);

    findFileFormat(fname, &format);
    spp = 0;  /* init to spp != 4 if not png */
    interlaced = 0;  /* initialize to no interlacing */
    bps = 0;  /* initialize to a nonsense value */
    if (format == IFF_PNG) {
        isPngInterlaced(fname, &interlaced);
        if (readHeaderPng(fname, NULL, NULL, &bps, &spp, NULL))
            return (L_COMP_DATA *)ERROR_PTR("bad png input", procName, NULL);
    }

        /* PDF is capable of inlining some types of PNG files, but not all
           of them. We need to transcode anything with interlacing, an
           alpha channel, or 1 bpp (which would otherwise be photo-inverted).

           Be careful with spp. Any PNG image file with an alpha
           channel is converted on reading to RGBA (spp == 4). This
           includes the (gray + alpha) format with spp == 2. You
           will get different results if you look at spp via
           readHeaderPng() versus pixGetSpp() */
    if (format != IFF_PNG || interlaced || bps == 1 || spp == 4 || spp == 2) {
        if (!pixs)
            pix = pixRead(fname);
        else
            pix = pixClone(pixs);
        if (!pix)
            return (L_COMP_DATA *)ERROR_PTR("pix not made", procName, NULL);
        cid = pixGenerateFlateData(pix, 0);
        pixDestroy(&pix);
        return cid;
    }

        /* It's png.  Generate the pdf data without transcoding.
         * Implementation by Jeff Breidenbach.
         * First, read the metadata */
    if ((fp = fopenReadStream(fname)) == NULL)
        return (L_COMP_DATA *)ERROR_PTR("stream not opened", procName, NULL);
    freadHeaderPng(fp, &w, &h, &bps, &spp, &cmapflag);
    fgetPngResolution(fp, &xres, &yres);
    fclose(fp);

        /* We get pdf corruption when inlining the data from 16 bpp png. */
    if (bps == 16)
        return l_generateFlateData(fname, 0);

        /* Read the entire png file */
    if ((pngcomp = l_binaryRead(fname, &nbytespng)) == NULL)
        return (L_COMP_DATA *)ERROR_PTR("unable to read file",
                                        procName, NULL);

        /* Extract flate data, copying portions of it to memory, including
         * the predictor information in a byte at the beginning of each
         * raster line.  The flate data makes up the vast majority of
         * the png file, so after extraction we expect datacomp to
         * be nearly full (i.e., nbytescomp will be only slightly less
         * than nbytespng).  Also extract the colormap if present. */
    if ((datacomp = (l_uint8 *)LEPT_CALLOC(1, nbytespng)) == NULL) {
        LEPT_FREE(pngcomp);
        return (L_COMP_DATA *)ERROR_PTR("unable to allocate memory",
                                        procName, NULL);
    }

        /* Parse the png file.  Each chunk consists of:
         *    length: 4 bytes
         *    name:   4 bytes (e.g., "IDAT")
         *    data:   n bytes
         *    CRC:    4 bytes
         * Start at the beginning of the data section of the first chunk,
         * byte 16, because the png file begins with 8 bytes of header,
         * followed by the first 8 bytes of the first chunk
         * (length and name).  On each loop, increment by 12 bytes to
         * skip over the CRC, length and name of the next chunk. */
    for (i = 16; i < nbytespng; i += 12) {  /* do each successive chunk */
            /* Get the chunk length */
        n  = pngcomp[i - 8] << 24;
        n += pngcomp[i - 7] << 16;
        n += pngcomp[i - 6] << 8;
        n += pngcomp[i - 5] << 0;
        if (n >= nbytespng - i) {  /* "n + i" can overflow */
            LEPT_FREE(pngcomp);
            LEPT_FREE(datacomp);
            pixcmapDestroy(&cmap);
            L_ERROR("invalid png: i = %d, n = %d, nbytes = %zu\n", procName,
                    i, n, nbytespng);
            return NULL;
        }

            /* Is it a data chunk? */
        if (memcmp(pngcomp + i - 4, "IDAT", 4) == 0) {
            memcpy(datacomp + nbytescomp, pngcomp + i, n);
            nbytescomp += n;
        }

            /* Is it a palette chunk? */
        if (cmapflag && !cmap &&
            memcmp(pngcomp + i - 4, "PLTE", 4) == 0) {
            if ((n / 3) > (1 << bps)) {
                LEPT_FREE(pngcomp);
                LEPT_FREE(datacomp);
                pixcmapDestroy(&cmap);
                L_ERROR("invalid png: i = %d, n = %d, cmapsize = %d\n",
                        procName, i, n, (1 << bps));
                return NULL;
            }
            cmap = pixcmapCreate(bps);
            for (j = i; j < i + n; j += 3) {
                pixcmapAddColor(cmap, pngcomp[j], pngcomp[j + 1],
                                pngcomp[j + 2]);
            }
        }
        i += n;  /* move to the end of the data chunk */
    }
    LEPT_FREE(pngcomp);

    if (nbytescomp == 0) {
        LEPT_FREE(datacomp);
        pixcmapDestroy(&cmap);
        return (L_COMP_DATA *)ERROR_PTR("invalid PNG file", procName, NULL);
    }

        /* Extract and encode the colormap data as hexascii  */
    ncolors = 0;
    if (cmap) {
        pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata);
        pixcmapDestroy(&cmap);
        if (!cmapdata) {
            LEPT_FREE(datacomp);
            return (L_COMP_DATA *)ERROR_PTR("cmapdata not made",
                                            procName, NULL);
        }
        cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors);
        LEPT_FREE(cmapdata);
    }

        /* Note that this is the only situation where the predictor
         * field of the CID is set to 1.  Adobe's predictor values on
         * p. 76 of pdf_reference_1-7.pdf give 1 for no predictor and
         * 10-14 for inline predictors, the specifics of which are
         * ignored by the pdf interpreter, which just needs to know that
         * the first byte on each compressed scanline is some predictor
         * whose type can be inferred from the byte itself.  */
    cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
    cid->datacomp = datacomp;
    cid->type = L_FLATE_ENCODE;
    cid->cmapdatahex = cmapdatahex;
    cid->nbytescomp = nbytescomp;
    cid->ncolors = ncolors;
    cid->predictor = TRUE;
    cid->w = w;
    cid->h = h;
    cid->bps = bps;
    cid->spp = spp;
    cid->res = xres;
    return cid;
}


/*!
 * \brief   l_generateJpegData()
 *
 * \param[in]    fname           of jpeg file
 * \param[in]    ascii85flag     0 for jpeg; 1 for ascii85-encoded jpeg
 * \return  cid containing jpeg data, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) Set ascii85flag:
 *           ~ 0 for binary data (not permitted in PostScript)
 *           ~ 1 for ascii85 (5 for 4) encoded binary data
 *               (not permitted in pdf)
 *      (2) Do not free the data.  l_generateJpegDataMem() will free
 *          the data if the data is invalid, or if it does not use
 *          ascii encoding.
 * </pre>
 */
L_COMP_DATA *
l_generateJpegData(const char  *fname,
                   l_int32      ascii85flag)
{
l_uint8      *data = NULL;
size_t        nbytes;

    PROCNAME("l_generateJpegData");

    if (!fname)
        return (L_COMP_DATA *)ERROR_PTR("fname not defined", procName, NULL);

        /* The returned jpeg data in memory is the entire jpeg file,
         * which starts with ffd8 and ends with ffd9 */
    if ((data = l_binaryRead(fname, &nbytes)) == NULL)
        return (L_COMP_DATA *)ERROR_PTR("data not extracted", procName, NULL);

    return l_generateJpegDataMem(data, nbytes, ascii85flag);
}


/*!
 * \brief   l_generateJpegDataMem()
 *
 * \param[in]    data           of jpeg file
 * \param[in]    nbytes         of jpeg file
 * \param[in]    ascii85flag    0 for jpeg; 1 for ascii85-encoded jpeg
 * \return  cid containing jpeg data, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) See l_generateJpegData().
 * </pre>
 */
L_COMP_DATA *
l_generateJpegDataMem(l_uint8  *data,
                      size_t    nbytes,
                      l_int32   ascii85flag)
{
char         *data85 = NULL;  /* ascii85 encoded jpeg compressed file */
l_int32       w, h, xres, yres, bps, spp;
l_int32       nbytes85;
L_COMP_DATA  *cid;

    PROCNAME("l_generateJpegDataMem");

    if (!data)
        return (L_COMP_DATA *)ERROR_PTR("data not defined", procName, NULL);

        /* Read the metadata */
    if (readHeaderMemJpeg(data, nbytes, &w, &h, &spp, NULL, NULL)) {
        LEPT_FREE(data);
        return (L_COMP_DATA *)ERROR_PTR("bad jpeg metadata", procName, NULL);
    }
    bps = 8;
    readResolutionMemJpeg(data, nbytes, &xres, &yres);

        /* Optionally, encode the compressed data */
    if (ascii85flag == 1) {
        data85 = encodeAscii85(data, nbytes, &nbytes85);
        LEPT_FREE(data);
        if (!data85)
            return (L_COMP_DATA *)ERROR_PTR("data85 not made", procName, NULL);
        else
            data85[nbytes85 - 1] = '\0';  /* remove the newline */
    }

    cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
    if (ascii85flag == 0) {
        cid->datacomp = data;
    } else {  /* ascii85 */
        cid->data85 = data85;
        cid->nbytes85 = nbytes85;
    }
    cid->type = L_JPEG_ENCODE;
    cid->nbytescomp = nbytes;
    cid->w = w;
    cid->h = h;
    cid->bps = bps;
    cid->spp = spp;
    cid->res = xres;
    return cid;
}


/*!
 * \brief   l_generateJp2kData()
 *
 * \param[in]    fname     of jp2k file
 * \return  cid containing jp2k data, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) This is only called after the file is verified to be jp2k.
 * </pre>
 */
static L_COMP_DATA *
l_generateJp2kData(const char  *fname)
{
l_int32       w, h, bps, spp, xres, yres;
size_t        nbytes;
L_COMP_DATA  *cid;
FILE         *fp;

    PROCNAME("l_generateJp2kData");

    if (!fname)
        return (L_COMP_DATA *)ERROR_PTR("fname not defined", procName, NULL);

    if (readHeaderJp2k(fname, &w, &h, &bps, &spp))
        return (L_COMP_DATA *)ERROR_PTR("bad jp2k metadata", procName, NULL);

    if ((cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA))) == NULL)
        return (L_COMP_DATA *)ERROR_PTR("cid not made", procName, NULL);

        /* The returned jp2k data in memory is the entire jp2k file */
    if ((cid->datacomp = l_binaryRead(fname, &nbytes)) == NULL) {
        l_CIDataDestroy(&cid);
        return (L_COMP_DATA *)ERROR_PTR("data not extracted", procName, NULL);
    }

    xres = yres = 0;
    if ((fp = fopenReadStream(fname)) != NULL) {
        fgetJp2kResolution(fp, &xres, &yres);
        fclose(fp);
    }
    cid->type = L_JP2K_ENCODE;
    cid->nbytescomp = nbytes;
    cid->w = w;
    cid->h = h;
    cid->bps = bps;
    cid->spp = spp;
    cid->res = xres;
    return cid;
}


/*!
 * \brief   l_generateCIData()
 *
 * \param[in]    fname
 * \param[in]    type       L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE,
 *                          L_JP2K_ENCODE
 * \param[in]    quality    for jpeg if transcoded: 1-100; 0 for default (75)
 *                          for jp2k if transcoded: 27-45; 0 for default (34)
 * \param[in]    ascii85    0 for binary; 1 for ascii85-encoded
 * \param[out]   pcid       compressed data
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) This can be used for both PostScript and pdf.
 *      (1) Set ascii85:
 *           ~ 0 for binary data (not permitted in PostScript)
 *           ~ 1 for ascii85 (5 for 4) encoded binary data
 *      (2) This attempts to compress according to the requested type.
 *          If this can't be done, it falls back to ordinary flate encoding.
 *      (3) This differs from l_generateCIDataPdf(), which determines
 *          the format and attempts to generate the CID without transcoding.
 * </pre>
 */
l_ok
l_generateCIData(const char    *fname,
                 l_int32        type,
                 l_int32        quality,
                 l_int32        ascii85,
                 L_COMP_DATA  **pcid)
{
l_int32       format, d, bps, spp, iscmap;
L_COMP_DATA  *cid;
PIX          *pix;

    PROCNAME("l_generateCIData");

    if (!pcid)
        return ERROR_INT("&cid not defined", procName, 1);
    *pcid = NULL;
    if (!fname)
        return ERROR_INT("fname not defined", procName, 1);
    if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
        type != L_FLATE_ENCODE && type != L_JP2K_ENCODE)
        return ERROR_INT("invalid conversion type", procName, 1);
    if (ascii85 != 0 && ascii85 != 1)
        return ERROR_INT("invalid ascii85", procName, 1);

        /* Sanity check on requested encoding */
    pixReadHeader(fname, &format, NULL, NULL, &bps, &spp, &iscmap);
    d = bps * spp;
    if (d == 24) d = 32;
    if (iscmap && type != L_FLATE_ENCODE) {
        L_WARNING("pixs has cmap; using flate encoding\n", procName);
        type = L_FLATE_ENCODE;
    } else if (d < 8 && type == L_JPEG_ENCODE) {
        L_WARNING("pixs has < 8 bpp; using flate encoding\n", procName);
        type = L_FLATE_ENCODE;
    } else if (d < 8 && type == L_JP2K_ENCODE) {
        L_WARNING("pixs has < 8 bpp; using flate encoding\n", procName);
        type = L_FLATE_ENCODE;
    } else if (d > 1 && type == L_G4_ENCODE) {
        L_WARNING("pixs has > 1 bpp; using flate encoding\n", procName);
        type = L_FLATE_ENCODE;
    }

    if (type == L_JPEG_ENCODE) {
        if (format == IFF_JFIF_JPEG) {  /* do not transcode */
            cid = l_generateJpegData(fname, ascii85);
        } else {
            if ((pix = pixRead(fname)) == NULL)
                return ERROR_INT("pix not returned", procName, 1);
            cid = pixGenerateJpegData(pix, ascii85, quality);
            pixDestroy(&pix);
        }
        if (!cid)
            return ERROR_INT("jpeg data not made", procName, 1);
    } else if (type == L_JP2K_ENCODE) {
        if (format == IFF_JP2) {  /* do not transcode */
            cid = l_generateJp2kData(fname);
        } else {
            if ((pix = pixRead(fname)) == NULL)
                return ERROR_INT("pix not returned", procName, 1);
            cid = pixGenerateJp2kData(pix, quality);
            pixDestroy(&pix);
        }
        if (!cid)
            return ERROR_INT("jp2k data not made", procName, 1);
    } else if (type == L_G4_ENCODE) {
        if ((cid = l_generateG4Data(fname, ascii85)) == NULL)
            return ERROR_INT("g4 data not made", procName, 1);
    } else if (type == L_FLATE_ENCODE) {
        if ((cid = l_generateFlateData(fname, ascii85)) == NULL)
            return ERROR_INT("flate data not made", procName, 1);
    } else {
        return ERROR_INT("invalid conversion type", procName, 1);
    }
    *pcid = cid;

    return 0;
}


/*!
 * \brief   pixGenerateCIData()
 *
 * \param[in]    pixs       8 or 32 bpp, no colormap
 * \param[in]    type       L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE or
 *                          L_JP2K_ENCODE
 * \param[in]    quality    for jpeg if transcoded: 1-100; 0 for default (75)
 *                          for jp2k if transcoded: 27-45; 0 for default (34)
 * \param[in]    ascii85    0 for binary; 1 for ascii85-encoded
 * \param[out]   pcid       compressed data
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) Set ascii85:
 *           ~ 0 for binary data (not permitted in PostScript)
 *           ~ 1 for ascii85 (5 for 4) encoded binary data
 * </pre>
 */
l_ok
pixGenerateCIData(PIX           *pixs,
                  l_int32        type,
                  l_int32        quality,
                  l_int32        ascii85,
                  L_COMP_DATA  **pcid)
{
l_int32   d;
PIXCMAP  *cmap;

    PROCNAME("pixGenerateCIData");

    if (!pcid)
        return ERROR_INT("&cid not defined", procName, 1);
    *pcid = NULL;
    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 1);
    if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
        type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) {
        selectDefaultPdfEncoding(pixs, &type);
    }
    if (ascii85 != 0 && ascii85 != 1)
        return ERROR_INT("invalid ascii85", procName, 1);

        /* Sanity check on requested encoding */
    d = pixGetDepth(pixs);
    cmap = pixGetColormap(pixs);
    if (cmap && type != L_FLATE_ENCODE) {
        L_WARNING("pixs has cmap; using flate encoding\n", procName);
        type = L_FLATE_ENCODE;
    } else if (d < 8 && (type == L_JPEG_ENCODE || type == L_JP2K_ENCODE)) {
        L_WARNING("pixs has < 8 bpp; using flate encoding\n", procName);
        type = L_FLATE_ENCODE;
    } else if (d > 1 && type == L_G4_ENCODE) {
        L_WARNING("pixs has > 1 bpp; using flate encoding\n", procName);
        type = L_FLATE_ENCODE;
    }

    if (type == L_JPEG_ENCODE) {
        if ((*pcid = pixGenerateJpegData(pixs, ascii85, quality)) == NULL)
            return ERROR_INT("jpeg data not made", procName, 1);
    } else if (type == L_JP2K_ENCODE) {
        if ((*pcid = pixGenerateJp2kData(pixs, quality)) == NULL)
            return ERROR_INT("jp2k data not made", procName, 1);
    } else if (type == L_G4_ENCODE) {
        if ((*pcid = pixGenerateG4Data(pixs, ascii85)) == NULL)
            return ERROR_INT("g4 data not made", procName, 1);
    } else {  /* type == L_FLATE_ENCODE */
        if ((*pcid = pixGenerateFlateData(pixs, ascii85)) == NULL)
            return ERROR_INT("flate data not made", procName, 1);
    }
    return 0;
}


/*!
 * \brief   l_generateFlateData()
 *
 * \param[in]    fname
 * \param[in]    ascii85flag    0 for gzipped; 1 for ascii85-encoded gzipped
 * \return  cid flate compressed image data, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) The input image is converted to one of these 4 types:
 *           ~ 1 bpp
 *           ~ 8 bpp, no colormap
 *           ~ 8 bpp, colormap
 *           ~ 32 bpp rgb
 *      (2) Set ascii85flag:
 *           ~ 0 for binary data (not permitted in PostScript)
 *           ~ 1 for ascii85 (5 for 4) encoded binary data
 * </pre>
 */
L_COMP_DATA *
l_generateFlateData(const char  *fname,
                    l_int32      ascii85flag)
{
L_COMP_DATA  *cid;
PIX          *pixs;

    PROCNAME("l_generateFlateData");

    if (!fname)
        return (L_COMP_DATA *)ERROR_PTR("fname not defined", procName, NULL);

    if ((pixs = pixRead(fname)) == NULL)
        return (L_COMP_DATA *)ERROR_PTR("pixs not made", procName, NULL);
    cid = pixGenerateFlateData(pixs, ascii85flag);
    pixDestroy(&pixs);
    return cid;
}


/*!
 * \brief   pixGenerateFlateData()
 *
 * \param[in]    pixs
 * \param[in]    ascii85flag 0    for gzipped; 1 for ascii85-encoded gzipped
 * \return  cid flate compressed image data, or NULL on error
 *
 *      Notes:
 *          1) This should not be called with an RGBA pix (spp == 4; it
 *             will ignore the alpha channel.  Likewise, if called with a
 *             colormapped pix, the alpha component in the colormap will
 *             be ignored as it is for all leptonica operations
 *             on colormapped pix.
 */
static L_COMP_DATA *
pixGenerateFlateData(PIX     *pixs,
                     l_int32  ascii85flag)
{
l_uint8      *data = NULL;  /* uncompressed raster data in required format */
l_uint8      *datacomp = NULL;  /* gzipped raster data */
char         *data85 = NULL;  /* ascii85 encoded gzipped raster data */
l_uint8      *cmapdata = NULL;  /* uncompressed colormap */
char         *cmapdata85 = NULL;  /* ascii85 encoded uncompressed colormap */
char         *cmapdatahex = NULL;  /* hex ascii uncompressed colormap */
l_int32       ncolors;  /* in colormap; not used if cmapdata85 is null */
l_int32       bps;  /* bits/sample: usually 8 */
l_int32       spp;  /* samples/pixel: 1-grayscale/cmap); 3-rgb */
l_int32       w, h, d, cmapflag;
l_int32       ncmapbytes85 = 0;
l_int32       nbytes85 = 0;
size_t        nbytes, nbytescomp;
L_COMP_DATA  *cid;
PIX          *pixt;
PIXCMAP      *cmap;

    PROCNAME("pixGenerateFlateData");

    if (!pixs)
        return (L_COMP_DATA *)ERROR_PTR("pixs not defined", procName, NULL);

        /* Convert the image to one of these 4 types:
         *     1 bpp
         *     8 bpp, no colormap
         *     8 bpp, colormap
         *     32 bpp rgb    */
    pixGetDimensions(pixs, &w, &h, &d);
    cmap = pixGetColormap(pixs);
    cmapflag = (cmap) ? 1 : 0;
    if (d == 2 || d == 4 || d == 16) {
        pixt = pixConvertTo8(pixs, cmapflag);
        cmap = pixGetColormap(pixt);
        d = pixGetDepth(pixt);
    } else {
        pixt = pixClone(pixs);
    }
    spp = (d == 32) ? 3 : 1;  /* ignores alpha */
    bps = (d == 32) ? 8 : d;

        /* Extract and encode the colormap data as both ascii85 and hexascii  */
    ncolors = 0;
    if (cmap) {
        pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata);
        if (!cmapdata) {
            pixDestroy(&pixt);
            return (L_COMP_DATA *)ERROR_PTR("cmapdata not made",
                                            procName, NULL);
        }

        cmapdata85 = encodeAscii85(cmapdata, 3 * ncolors, &ncmapbytes85);
        cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors);
        LEPT_FREE(cmapdata);
    }

        /* Extract and compress the raster data */
    pixGetRasterData(pixt, &data, &nbytes);
    pixDestroy(&pixt);
    datacomp = zlibCompress(data, nbytes, &nbytescomp);
    LEPT_FREE(data);
    if (!datacomp) {
        LEPT_FREE(cmapdata85);
        LEPT_FREE(cmapdatahex);
        return (L_COMP_DATA *)ERROR_PTR("datacomp not made", procName, NULL);
    }

        /* Optionally, encode the compressed data */
    if (ascii85flag == 1) {
        data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85);
        LEPT_FREE(datacomp);
        if (!data85) {
            LEPT_FREE(cmapdata85);
            LEPT_FREE(cmapdatahex);
            return (L_COMP_DATA *)ERROR_PTR("data85 not made", procName, NULL);
        } else {
            data85[nbytes85 - 1] = '\0';  /* remove the newline */
        }
    }

    cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
    if (ascii85flag == 0) {
        cid->datacomp = datacomp;
    } else {  /* ascii85 */
        cid->data85 = data85;
        cid->nbytes85 = nbytes85;
    }
    cid->type = L_FLATE_ENCODE;
    cid->cmapdatahex = cmapdatahex;
    cid->cmapdata85 = cmapdata85;
    cid->nbytescomp = nbytescomp;
    cid->ncolors = ncolors;
    cid->w = w;
    cid->h = h;
    cid->bps = bps;
    cid->spp = spp;
    cid->res = pixGetXRes(pixs);
    cid->nbytes = nbytes;  /* only for debugging */
    return cid;
}


/*!
 * \brief   pixGenerateJpegData()
 *
 * \param[in]    pixs           8 or 32 bpp, no colormap
 * \param[in]    ascii85flag    0 for jpeg; 1 for ascii85-encoded jpeg
 * \param[in]    quality        0 for default, which is 75
 * \return  cid jpeg compressed data, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) Set ascii85flag:
 *           ~ 0 for binary data (not permitted in PostScript)
 *           ~ 1 for ascii85 (5 for 4) encoded binary data
 * </pre>
 */
static L_COMP_DATA *
pixGenerateJpegData(PIX     *pixs,
                    l_int32  ascii85flag,
                    l_int32  quality)
{
l_int32       d;
char         *fname;
L_COMP_DATA  *cid;

    PROCNAME("pixGenerateJpegData");

    if (!pixs)
        return (L_COMP_DATA *)ERROR_PTR("pixs not defined", procName, NULL);
    if (pixGetColormap(pixs))
        return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", procName, NULL);
    d = pixGetDepth(pixs);
    if (d != 8 && d != 32)
        return (L_COMP_DATA *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL);

        /* Compress to a temp jpeg file */
    fname = l_makeTempFilename();
    if (pixWriteJpeg(fname, pixs, quality, 0)) {
        LEPT_FREE(fname);
        return NULL;
    }

        /* Generate the data */
    cid = l_generateJpegData(fname, ascii85flag);
    if (lept_rmfile(fname) != 0)
        L_ERROR("temp file %s was not deleted\n", procName, fname);
    LEPT_FREE(fname);
    return cid;
}


/*!
 * \brief   pixGenerateJp2kData()
 *
 * \param[in]    pixs           8 or 32 bpp, no colormap
 * \param[in]    quality        0 for default, which is 34
 * \return  cid jp2k compressed data, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) The quality can be set between 27 (very poor) and 45
 *          (nearly perfect).  Use 0 for default (34). Use 100 for lossless,
 *          but this is very expensive and not recommended.
 * </pre>
 */
static L_COMP_DATA *
pixGenerateJp2kData(PIX     *pixs,
                    l_int32  quality)
{
l_int32       d;
char         *fname;
L_COMP_DATA  *cid;

    PROCNAME("pixGenerateJp2kData");

    if (!pixs)
        return (L_COMP_DATA *)ERROR_PTR("pixs not defined", procName, NULL);
    if (pixGetColormap(pixs))
        return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", procName, NULL);
    d = pixGetDepth(pixs);
    if (d != 8 && d != 32)
        return (L_COMP_DATA *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL);

        /* Compress to a temp jp2k file */
    fname = l_makeTempFilename();
    if (pixWriteJp2k(fname, pixs, quality, 5, 0, 0)) {
        LEPT_FREE(fname);
        return NULL;
    }

        /* Generate the data */
    cid = l_generateJp2kData(fname);
    if (lept_rmfile(fname) != 0)
        L_ERROR("temp file %s was not deleted\n", procName, fname);
    LEPT_FREE(fname);
    return cid;
}


/*!
 * \brief   pixGenerateG4Data()
 *
 * \param[in]    pixs           1 bpp
 * \param[in]    ascii85flag    0 for gzipped; 1 for ascii85-encoded gzipped
 * \return  cid g4 compressed image data, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) Set ascii85flag:
 *           ~ 0 for binary data (not permitted in PostScript)
 *           ~ 1 for ascii85 (5 for 4) encoded binary data
 * </pre>
 */
static L_COMP_DATA *
pixGenerateG4Data(PIX     *pixs,
                  l_int32  ascii85flag)
{
char         *fname;
L_COMP_DATA  *cid;

    PROCNAME("pixGenerateG4Data");

    if (!pixs)
        return (L_COMP_DATA *)ERROR_PTR("pixs not defined", procName, NULL);
    if (pixGetDepth(pixs) != 1)
        return (L_COMP_DATA *)ERROR_PTR("pixs not 1 bpp", procName, NULL);

        /* Compress to a temp tiff g4 file */
    fname = l_makeTempFilename();
    if (pixWrite(fname, pixs, IFF_TIFF_G4)) {
        LEPT_FREE(fname);
        return NULL;
    }

    cid = l_generateG4Data(fname, ascii85flag);
    if (lept_rmfile(fname) != 0)
        L_ERROR("temp file %s was not deleted\n", procName, fname);
    LEPT_FREE(fname);
    return cid;
}


/*!
 * \brief   l_generateG4Data()
 *
 * \param[in]    fname          of g4 compressed file
 * \param[in]    ascii85flag    0 for g4 compressed; 1 for ascii85-encoded g4
 * \return  cid g4 compressed image data, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) Set ascii85flag:
 *           ~ 0 for binary data (not permitted in PostScript)
 *           ~ 1 for ascii85 (5 for 4) encoded binary data
 *             (not permitted in pdf)
 * </pre>
 */
L_COMP_DATA *
l_generateG4Data(const char  *fname,
                 l_int32      ascii85flag)
{
l_uint8      *datacomp = NULL;  /* g4 compressed raster data */
char         *data85 = NULL;  /* ascii85 encoded g4 compressed data */
l_int32       w, h, xres, yres;
l_int32       minisblack;  /* TRUE or FALSE */
l_int32       nbytes85;
size_t        nbytescomp;
L_COMP_DATA  *cid;
FILE         *fp;

    PROCNAME("l_generateG4Data");

    if (!fname)
        return (L_COMP_DATA *)ERROR_PTR("fname not defined", procName, NULL);

        /* Read the resolution */
    if ((fp = fopenReadStream(fname)) == NULL)
        return (L_COMP_DATA *)ERROR_PTR("stream not opened", procName, NULL);
    getTiffResolution(fp, &xres, &yres);
    fclose(fp);

        /* The returned ccitt g4 data in memory is the block of
         * bytes in the tiff file, starting after 8 bytes and
         * ending before the directory. */
    if (extractG4DataFromFile(fname, &datacomp, &nbytescomp,
                              &w, &h, &minisblack)) {
        return (L_COMP_DATA *)ERROR_PTR("datacomp not extracted",
                                        procName, NULL);
    }

        /* Optionally, encode the compressed data */
    if (ascii85flag == 1) {
        data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85);
        LEPT_FREE(datacomp);
        if (!data85)
            return (L_COMP_DATA *)ERROR_PTR("data85 not made", procName, NULL);
        else
            data85[nbytes85 - 1] = '\0';  /* remove the newline */
    }

    cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
    if (ascii85flag == 0) {
        cid->datacomp = datacomp;
    } else {  /* ascii85 */
        cid->data85 = data85;
        cid->nbytes85 = nbytes85;
    }
    cid->type = L_G4_ENCODE;
    cid->nbytescomp = nbytescomp;
    cid->w = w;
    cid->h = h;
    cid->bps = 1;
    cid->spp = 1;
    cid->minisblack = minisblack;
    cid->res = xres;
    return cid;
}


/*!
 * \brief   cidConvertToPdfData()
 *
 * \param[in]    cid       compressed image data
 * \param[in]    title     [optional] pdf title; can be NULL
 * \param[out]   pdata     output pdf data for image
 * \param[out]   pnbytes   size of output pdf data
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) Caller must not destroy the cid.  It is absorbed in the
 *          lpd and destroyed by this function.
 * </pre>
 */
l_ok
cidConvertToPdfData(L_COMP_DATA  *cid,
                    const char   *title,
                    l_uint8     **pdata,
                    size_t       *pnbytes)
{
l_int32      res, ret;
l_float32    wpt, hpt;
L_PDF_DATA  *lpd = NULL;

    PROCNAME("cidConvertToPdfData");

    if (!pdata || !pnbytes)
        return ERROR_INT("&data and &nbytes not both defined", procName, 1);
    *pdata = NULL;
    *pnbytes = 0;
    if (!cid)
        return ERROR_INT("cid not defined", procName, 1);

        /* Get media box parameters, in pts */
    res = cid->res;
    if (res <= 0)
        res = DefaultInputRes;
    wpt = cid->w * 72. / res;
    hpt = cid->h * 72. / res;

        /* Set up the pdf data struct (lpd) */
    if ((lpd = pdfdataCreate(title)) == NULL)
        return ERROR_INT("lpd not made", procName, 1);
    ptraAdd(lpd->cida, cid);
    lpd->n++;
    ptaAddPt(lpd->xy, 0, 0);   /* xpt = ypt = 0 */
    ptaAddPt(lpd->wh, wpt, hpt);

        /* Generate the pdf string and destroy the lpd */
    ret = l_generatePdf(pdata, pnbytes, lpd);
    pdfdataDestroy(&lpd);
    if (ret)
        return ERROR_INT("pdf output not made", procName, 1);
    return 0;
}


/*!
 * \brief   l_CIDataDestroy()
 *
 * \param[in,out]   pcid     will be set to null before returning
 * \return  void
 */
void
l_CIDataDestroy(L_COMP_DATA  **pcid)
{
L_COMP_DATA  *cid;

    PROCNAME("l_CIDataDestroy");

    if (pcid == NULL) {
        L_WARNING("ptr address is null!\n", procName);
        return;
    }
    if ((cid = *pcid) == NULL)
        return;

    if (cid->datacomp) LEPT_FREE(cid->datacomp);
    if (cid->data85) LEPT_FREE(cid->data85);
    if (cid->cmapdata85) LEPT_FREE(cid->cmapdata85);
    if (cid->cmapdatahex) LEPT_FREE(cid->cmapdatahex);
    LEPT_FREE(cid);
    *pcid = NULL;
    return;
}


/*---------------------------------------------------------------------*
 *         Helper functions for generating the output pdf string       *
 *---------------------------------------------------------------------*/
/*!
 * \brief   l_generatePdf()
 *
 * \param[out]   pdata     pdf array
 * \param[out]   pnbytes   number of bytes in pdf array
 * \param[in]    lpd       all the required input image data
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) On error, no data is returned.
 *      (2) The objects are:
 *            1: Catalog
 *            2: Info
 *            3: Pages
 *            4: Page
 *            5: Contents  (rendering command)
 *            6 to 6+n-1: n XObjects
 *            6+n to 6+n+m-1: m colormaps
 * </pre>
 */
static l_int32
l_generatePdf(l_uint8    **pdata,
              size_t      *pnbytes,
              L_PDF_DATA  *lpd)
{
    PROCNAME("l_generatePdf");

    if (!pdata)
        return ERROR_INT("&data not defined", procName, 1);
    *pdata = NULL;
    if (!pnbytes)
        return ERROR_INT("&nbytes not defined", procName, 1);
    *pnbytes = 0;
    if (!lpd)
        return ERROR_INT("lpd not defined", procName, 1);

    generateFixedStringsPdf(lpd);
    generateMediaboxPdf(lpd);
    generatePageStringPdf(lpd);
    generateContentStringPdf(lpd);
    generatePreXStringsPdf(lpd);
    generateColormapStringsPdf(lpd);
    generateTrailerPdf(lpd);
    return generateOutputDataPdf(pdata, pnbytes, lpd);
}


static void
generateFixedStringsPdf(L_PDF_DATA  *lpd)
{
char     buf[L_SMALLBUF];
char    *version, *datestr;
SARRAY  *sa;

    PROCNAME("generateFixedStringsPdf");

        /* Accumulate data for the header and objects 1-3 */
    lpd->id = stringNew("%PDF-1.5\n");
    l_dnaAddNumber(lpd->objsize, strlen(lpd->id));

    lpd->obj1 = stringNew("1 0 obj\n"
                          "<<\n"
                          "/Type /Catalog\n"
                          "/Pages 3 0 R\n"
                          ">>\n"
                          "endobj\n");
    l_dnaAddNumber(lpd->objsize, strlen(lpd->obj1));

    sa = sarrayCreate(0);
    sarrayAddString(sa, "2 0 obj\n"
                        "<<\n", L_COPY);
    if (var_WRITE_DATE_AND_VERSION) {
        datestr = l_getFormattedDate();
        snprintf(buf, sizeof(buf), "/CreationDate (D:%s)\n", datestr);
        sarrayAddString(sa, buf, L_COPY);
        LEPT_FREE(datestr);
        version = getLeptonicaVersion();
        snprintf(buf, sizeof(buf),
                 "/Producer (leptonica: %s)\n", version);
        LEPT_FREE(version);
    } else {
        snprintf(buf, sizeof(buf), "/Producer (leptonica)\n");
    }
    sarrayAddString(sa, buf, L_COPY);
    if (lpd->title) {
        char *hexstr;
        if ((hexstr = generateEscapeString(lpd->title)) != NULL) {
            snprintf(buf, sizeof(buf), "/Title %s\n", hexstr);
            sarrayAddString(sa, buf, L_COPY);
        } else {
            L_ERROR("title string is not ascii\n", procName);
        }
        LEPT_FREE(hexstr);
    }
    sarrayAddString(sa, ">>\n"
                                "endobj\n", L_COPY);
    lpd->obj2 = sarrayToString(sa, 0);
    l_dnaAddNumber(lpd->objsize, strlen(lpd->obj2));
    sarrayDestroy(&sa);

    lpd->obj3 = stringNew("3 0 obj\n"
                          "<<\n"
                          "/Type /Pages\n"
                          "/Kids [ 4 0 R ]\n"
                          "/Count 1\n"
                          ">>\n");
    l_dnaAddNumber(lpd->objsize, strlen(lpd->obj3));

        /* Do the post-datastream string */
    lpd->poststream = stringNew("\n"
                                "endstream\n"
                                "endobj\n");
    return;
}


/*!
 * \brief   generateEscapeString()
 *
 * \param[in]   str      input string
 * \return   hex escape string, or null on error
 *
 * <pre>
 * Notes:
 *      (1) If the input string is not ascii, returns null.
 *      (2) This takes an input ascii string and generates a hex
 *          ascii output string with 4 bytes out for each byte in.
 *          The feff code at the beginning tells the pdf interpreter
 *          that the data is to be interpreted as big-endian, 4 bytes
 *          at a time.  For ascii, the first two bytes are 0 and the
 *          last two bytes are less than 0x80.
 * </pre>
 */
static char  *
generateEscapeString(const char  *str)
{
char     smallbuf[8];
char    *buffer;
l_int32  i, nchar, buflen;

    PROCNAME("generateEscapeString");

    if (!str)
        return (char *)ERROR_PTR("str not defined", procName, NULL);
    nchar = strlen(str);
    for (i = 0; i < nchar; i++) {
        if (str[i] < 0)
            return (char *)ERROR_PTR("str not all ascii", procName, NULL);
    }

    buflen = 4 * nchar + 10;
    buffer = (char *)LEPT_CALLOC(buflen, sizeof(char));
    stringCat(buffer, buflen, "<feff");
    for (i = 0; i < nchar; i++) {
        snprintf(smallbuf, sizeof(smallbuf), "%04x", str[i]);
        stringCat(buffer, buflen, smallbuf);
    }
    stringCat(buffer, buflen, ">");
    return buffer;
}


static void
generateMediaboxPdf(L_PDF_DATA  *lpd)
{
l_int32    i;
l_float32  xpt, ypt, wpt, hpt, maxx, maxy;

        /* First get the full extent of all the images.
         * This is the mediabox, in pts. */
    maxx = maxy = 0;
    for (i = 0; i < lpd->n; i++) {
        ptaGetPt(lpd->xy, i, &xpt, &ypt);
        ptaGetPt(lpd->wh, i, &wpt, &hpt);
        maxx = L_MAX(maxx, xpt + wpt);
        maxy = L_MAX(maxy, ypt + hpt);
    }

    lpd->mediabox = boxCreate(0, 0, (l_int32)(maxx + 0.5),
                              (l_int32)(maxy + 0.5));

        /* ypt is in standard image coordinates: the location of
         * the UL image corner with respect to the UL media box corner.
         * Rewrite each ypt for PostScript coordinates: the location of
         * the LL image corner with respect to the LL media box corner. */
    for (i = 0; i < lpd->n; i++) {
        ptaGetPt(lpd->xy, i, &xpt, &ypt);
        ptaGetPt(lpd->wh, i, &wpt, &hpt);
        ptaSetPt(lpd->xy, i, xpt, maxy - ypt - hpt);
    }

    return;
}


static l_int32
generatePageStringPdf(L_PDF_DATA  *lpd)
{
char    *buf;
char    *xstr;
l_int32  bufsize, i, wpt, hpt;
SARRAY  *sa;

    PROCNAME("generatePageStringPdf");

        /* Allocate 1000 bytes for the boilerplate text, and
         * 50 bytes for each reference to an image in the
         * ProcSet array.  */
    bufsize = 1000 + 50 * lpd->n;
    if ((buf = (char *)LEPT_CALLOC(bufsize, sizeof(char))) == NULL)
        return ERROR_INT("calloc fail for buf", procName, 1);

    boxGetGeometry(lpd->mediabox, NULL, NULL, &wpt, &hpt);
    sa = sarrayCreate(lpd->n);
    for (i = 0; i < lpd->n; i++) {
        snprintf(buf, bufsize, "/Im%d %d 0 R   ", i + 1, 6 + i);
        sarrayAddString(sa, buf, L_COPY);
    }
    xstr = sarrayToString(sa, 0);
    sarrayDestroy(&sa);
    if (!xstr) {
        LEPT_FREE(buf);
        return ERROR_INT("xstr not made", procName, 1);
    }

    snprintf(buf, bufsize, "4 0 obj\n"
                           "<<\n"
                           "/Type /Page\n"
                           "/Parent 3 0 R\n"
                           "/MediaBox [%d %d %d %d]\n"
                           "/Contents 5 0 R\n"
                           "/Resources\n"
                           "<<\n"
                           "/XObject << %s >>\n"
                           "/ProcSet [ /ImageB /ImageI /ImageC ]\n"
                           ">>\n"
                           ">>\n"
                           "endobj\n",
                           0, 0, wpt, hpt, xstr);

    lpd->obj4 = stringNew(buf);
    l_dnaAddNumber(lpd->objsize, strlen(lpd->obj4));
    sarrayDestroy(&sa);
    LEPT_FREE(buf);
    LEPT_FREE(xstr);
    return 0;
}


static l_int32
generateContentStringPdf(L_PDF_DATA  *lpd)
{
char      *buf;
char      *cstr;
l_int32    i, bufsize;
l_float32  xpt, ypt, wpt, hpt;
SARRAY    *sa;

    PROCNAME("generateContentStringPdf");

    bufsize = 1000 + 200 * lpd->n;
    if ((buf = (char *)LEPT_CALLOC(bufsize, sizeof(char))) == NULL)
        return ERROR_INT("calloc fail for buf", procName, 1);

    sa = sarrayCreate(lpd->n);
    for (i = 0; i < lpd->n; i++) {
        ptaGetPt(lpd->xy, i, &xpt, &ypt);
        ptaGetPt(lpd->wh, i, &wpt, &hpt);
        snprintf(buf, bufsize,
                 "q %.4f %.4f %.4f %.4f %.4f %.4f cm /Im%d Do Q\n",
                 wpt, 0.0, 0.0, hpt, xpt, ypt, i + 1);
        sarrayAddString(sa, buf, L_COPY);
    }
    cstr = sarrayToString(sa, 0);
    sarrayDestroy(&sa);
    if (!cstr) {
        LEPT_FREE(buf);
        return ERROR_INT("cstr not made", procName, 1);
    }

    snprintf(buf, bufsize, "5 0 obj\n"
                           "<< /Length %d >>\n"
                           "stream\n"
                           "%s"
                           "endstream\n"
                           "endobj\n",
                           (l_int32)strlen(cstr), cstr);

    lpd->obj5 = stringNew(buf);
    l_dnaAddNumber(lpd->objsize, strlen(lpd->obj5));
    sarrayDestroy(&sa);
    LEPT_FREE(buf);
    LEPT_FREE(cstr);
    return 0;
}


static l_int32
generatePreXStringsPdf(L_PDF_DATA  *lpd)
{
char          buff[256];
char          buf[L_BIGBUF];
char         *cstr, *bstr, *fstr, *pstr, *xstr;
l_int32       i, cmindex;
L_COMP_DATA  *cid;
SARRAY       *sa;

    PROCNAME("generatePreXStringsPdf");

    sa = lpd->saprex;
    cmindex = 6 + lpd->n;  /* starting value */
    for (i = 0; i < lpd->n; i++) {
        pstr = cstr = NULL;
        if ((cid = pdfdataGetCid(lpd, i)) == NULL)
            return ERROR_INT("cid not found", procName, 1);

        if (cid->type == L_G4_ENCODE) {
            if (var_WRITE_G4_IMAGE_MASK) {
                cstr = stringNew("/ImageMask true\n"
                                 "/ColorSpace /DeviceGray");
            } else {
                cstr = stringNew("/ColorSpace /DeviceGray");
            }
            bstr = stringNew("/BitsPerComponent 1\n"
                             "/Interpolate true");
            snprintf(buff, sizeof(buff),
                     "/Filter /CCITTFaxDecode\n"
                     "/DecodeParms\n"
                     "<<\n"
                     "/K -1\n"
                     "/Columns %d\n"
                     ">>", cid->w);
            fstr = stringNew(buff);
        } else if (cid->type == L_JPEG_ENCODE) {
            if (cid->spp == 1)
                cstr = stringNew("/ColorSpace /DeviceGray");
            else if (cid->spp == 3)
                cstr = stringNew("/ColorSpace /DeviceRGB");
            else if (cid->spp == 4)   /* pdf supports cmyk */
                cstr = stringNew("/ColorSpace /DeviceCMYK");
            else
                L_ERROR("in jpeg: spp != 1, 3 or 4\n", procName);
            bstr = stringNew("/BitsPerComponent 8");
            fstr = stringNew("/Filter /DCTDecode");
        } else if (cid->type == L_JP2K_ENCODE) {
            if (cid->spp == 1)
                cstr = stringNew("/ColorSpace /DeviceGray");
            else if (cid->spp == 3)
                cstr = stringNew("/ColorSpace /DeviceRGB");
            else
                L_ERROR("in jp2k: spp != 1 && spp != 3\n", procName);
            bstr = stringNew("/BitsPerComponent 8");
            fstr = stringNew("/Filter /JPXDecode");
        } else {  /* type == L_FLATE_ENCODE */
            if (cid->ncolors > 0) {  /* cmapped */
                snprintf(buff, sizeof(buff), "/ColorSpace %d 0 R", cmindex++);
                cstr = stringNew(buff);
            } else {
                if (cid->spp == 1 && cid->bps == 1)
                    cstr = stringNew("/ColorSpace /DeviceGray\n"
                                     "/Decode [1 0]");
                else if (cid->spp == 1)  /* 8 bpp */
                    cstr = stringNew("/ColorSpace /DeviceGray");
                else if (cid->spp == 3)
                    cstr = stringNew("/ColorSpace /DeviceRGB");
                else
                    L_ERROR("unknown colorspace: spp = %d\n",
                            procName, cid->spp);
            }
            snprintf(buff, sizeof(buff), "/BitsPerComponent %d", cid->bps);
            bstr = stringNew(buff);
            fstr = stringNew("/Filter /FlateDecode");
            if (cid->predictor == TRUE) {
                snprintf(buff, sizeof(buff),
                         "/DecodeParms\n"
                         "<<\n"
                         "  /Columns %d\n"
                         "  /Predictor 14\n"
                         "  /Colors %d\n"
                         "  /BitsPerComponent %d\n"
                         ">>\n", cid->w, cid->spp, cid->bps);
                pstr = stringNew(buff);
            }
        }
        if (!pstr)  /* no decode parameters */
            pstr = stringNew("");

        snprintf(buf, sizeof(buf),
                 "%d 0 obj\n"
                 "<<\n"
                 "/Length %zu\n"
                 "/Subtype /Image\n"
                 "%s\n"  /* colorspace */
                 "/Width %d\n"
                 "/Height %d\n"
                 "%s\n"  /* bits/component */
                 "%s\n"  /* filter */
                 "%s"   /* decode parms; can be empty */
                 ">>\n"
                 "stream\n",
                 6 + i, cid->nbytescomp, cstr,
                 cid->w, cid->h, bstr, fstr, pstr);
        xstr = stringNew(buf);
        sarrayAddString(sa, xstr, L_INSERT);
        l_dnaAddNumber(lpd->objsize,
                      strlen(xstr) + cid->nbytescomp + strlen(lpd->poststream));
        LEPT_FREE(cstr);
        LEPT_FREE(bstr);
        LEPT_FREE(fstr);
        LEPT_FREE(pstr);
    }

    return 0;
}


static l_int32
generateColormapStringsPdf(L_PDF_DATA  *lpd)
{
char          buf[L_BIGBUF];
char         *cmstr;
l_int32       i, cmindex, ncmap;
L_COMP_DATA  *cid;
SARRAY       *sa;

    PROCNAME("generateColormapStringsPdf");

        /* In our canonical format, we have 5 objects, followed
         * by n XObjects, followed by m colormaps, so the index of
         * the first colormap object is 6 + n. */
    sa = lpd->sacmap;
    cmindex = 6 + lpd->n;  /* starting value */
    ncmap = 0;
    for (i = 0; i < lpd->n; i++) {
        if ((cid = pdfdataGetCid(lpd, i)) == NULL)
            return ERROR_INT("cid not found", procName, 1);
        if (cid->ncolors == 0) continue;

        ncmap++;
        snprintf(buf, sizeof(buf), "%d 0 obj\n"
                                   "[ /Indexed /DeviceRGB\n"
                                   "%d\n"
                                   "%s\n"
                                   "]\n"
                                   "endobj\n",
                                   cmindex, cid->ncolors - 1, cid->cmapdatahex);
        cmindex++;
        cmstr = stringNew(buf);
        l_dnaAddNumber(lpd->objsize, strlen(cmstr));
        sarrayAddString(sa, cmstr, L_INSERT);
    }

    lpd->ncmap = ncmap;
    return 0;
}


static void
generateTrailerPdf(L_PDF_DATA  *lpd)
{
l_int32  i, n, size, linestart;
L_DNA   *daloc, *dasize;

        /* Let nobj be the number of numbered objects.  These numbered
         * objects are indexed by their pdf number in arrays naloc[]
         * and nasize[].  The 0th object is the 9 byte header.  Then
         * the number of objects in nasize, which includes the header,
         * is n = nobj + 1.  The array naloc[] has n + 1 elements,
         * because it includes as the last element the starting
         * location of xref.  The indexing of these objects, their
         * starting locations and sizes are:
         *
         *     Object number         Starting location         Size
         *     -------------         -----------------     --------------
         *          0                   daloc[0] = 0       dasize[0] = 9
         *          1                   daloc[1] = 9       dasize[1] = 49
         *          n                   daloc[n]           dasize[n]
         *          xref                daloc[n+1]
         *
         * We first generate daloc.
         */
    dasize = lpd->objsize;
    daloc = lpd->objloc;
    linestart = 0;
    l_dnaAddNumber(daloc, linestart);  /* header */
    n = l_dnaGetCount(dasize);
    for (i = 0; i < n; i++) {
        l_dnaGetIValue(dasize, i, &size);
        linestart += size;
        l_dnaAddNumber(daloc, linestart);
    }
    l_dnaGetIValue(daloc, n, &lpd->xrefloc);  /* save it */

        /* Now make the actual trailer string */
    lpd->trailer = makeTrailerStringPdf(daloc);
}


static char *
makeTrailerStringPdf(L_DNA  *daloc)
{
char    *outstr;
char     buf[L_BIGBUF];
l_int32  i, n, linestart, xrefloc;
SARRAY  *sa;

    PROCNAME("makeTrailerStringPdf");

    if (!daloc)
        return (char *)ERROR_PTR("daloc not defined", procName, NULL);
    n = l_dnaGetCount(daloc) - 1;  /* numbered objects + 1 (yes, +1) */

    sa = sarrayCreate(0);
    snprintf(buf, sizeof(buf), "xref\n"
                               "0 %d\n"
                               "0000000000 65535 f \n", n);
    sarrayAddString(sa, buf, L_COPY);
    for (i = 1; i < n; i++) {
        l_dnaGetIValue(daloc, i, &linestart);
        snprintf(buf, sizeof(buf), "%010d 00000 n \n", linestart);
        sarrayAddString(sa, buf, L_COPY);
    }

    l_dnaGetIValue(daloc, n, &xrefloc);
    snprintf(buf, sizeof(buf), "trailer\n"
                               "<<\n"
                               "/Size %d\n"
                               "/Root 1 0 R\n"
                               "/Info 2 0 R\n"
                               ">>\n"
                               "startxref\n"
                               "%d\n"
                               "%%%%EOF\n", n, xrefloc);
    sarrayAddString(sa, buf, L_COPY);
    outstr = sarrayToString(sa, 0);
    sarrayDestroy(&sa);
    return outstr;
}


/*!
 * \brief   generateOutputDataPdf()
 *
 * \param[out]   pdata      pdf data array
 * \param[out]   pnbytes    size of pdf data array
 * \param[in]    lpd        input data used to make pdf
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) Only called from l_generatePdf().  On error, no data is returned.
 * </pre>
 */
static l_int32
generateOutputDataPdf(l_uint8    **pdata,
                      size_t      *pnbytes,
                      L_PDF_DATA  *lpd)
{
char         *str;
l_uint8      *data;
l_int32       nimages, i, len;
l_int32      *sizes, *locs;
size_t        nbytes;
L_COMP_DATA  *cid;

    PROCNAME("generateOutputDataPdf");

    if (!pdata)
        return ERROR_INT("&data not defined", procName, 1);
    *pdata = NULL;
    if (!pnbytes)
        return ERROR_INT("&nbytes not defined", procName, 1);
    nbytes = lpd->xrefloc + strlen(lpd->trailer);
    *pnbytes = nbytes;
    if ((data = (l_uint8 *)LEPT_CALLOC(nbytes, sizeof(l_uint8))) == NULL)
        return ERROR_INT("calloc fail for data", procName, 1);
    *pdata = data;

    sizes = l_dnaGetIArray(lpd->objsize);
    locs = l_dnaGetIArray(lpd->objloc);
    memcpy(data, lpd->id, sizes[0]);
    memcpy(data + locs[1], lpd->obj1, sizes[1]);
    memcpy(data + locs[2], lpd->obj2, sizes[2]);
    memcpy(data + locs[3], lpd->obj3, sizes[3]);
    memcpy(data + locs[4], lpd->obj4, sizes[4]);
    memcpy(data + locs[5], lpd->obj5, sizes[5]);

        /* Each image has 3 parts: variable preamble, the compressed
         * data stream, and the fixed poststream. */
    nimages = lpd->n;
    for (i = 0; i < nimages; i++) {
        if ((cid = pdfdataGetCid(lpd, i)) == NULL) {  /* should not happen */
            LEPT_FREE(sizes);
            LEPT_FREE(locs);
            return ERROR_INT("cid not found", procName, 1);
        }
        str = sarrayGetString(lpd->saprex, i, L_NOCOPY);
        len = strlen(str);
        memcpy(data + locs[6 + i], str, len);
        memcpy(data + locs[6 + i] + len,
               cid->datacomp, cid->nbytescomp);
        memcpy(data + locs[6 + i] + len + cid->nbytescomp,
               lpd->poststream, strlen(lpd->poststream));
    }

        /* Each colormap is simply a stored string */
    for (i = 0; i < lpd->ncmap; i++) {
        str = sarrayGetString(lpd->sacmap, i, L_NOCOPY);
        memcpy(data + locs[6 + nimages + i], str, strlen(str));
    }

        /* And finally the trailer */
    memcpy(data + lpd->xrefloc, lpd->trailer, strlen(lpd->trailer));
    LEPT_FREE(sizes);
    LEPT_FREE(locs);
    return 0;
}


/*---------------------------------------------------------------------*
 *          Helper functions for generating multipage pdf output       *
 *---------------------------------------------------------------------*/
/*!
 * \brief   parseTrailerPdf()
 *
 * \param[in]    bas     lba of a pdf file
 * \param[out]   pda     byte locations of the beginning of each object
 * \return  0 if OK, 1 on error
 */
static l_int32
parseTrailerPdf(L_BYTEA  *bas,
                L_DNA   **pda)
{
char     *str;
l_uint8   nl = '\n';
l_uint8  *data;
l_int32   i, j, start, startloc, xrefloc, found, loc, nobj, objno, trailer_ok;
size_t    size;
L_DNA    *da, *daobj, *daxref;
SARRAY   *sa;

    PROCNAME("parseTrailerPdf");

    if (!pda)
        return ERROR_INT("&da not defined", procName, 1);
    *pda = NULL;
    if (!bas)
        return ERROR_INT("bas not defined", procName, 1);
    data = l_byteaGetData(bas, &size);
    if (memcmp(data, "%PDF-1.", 7) != 0)
        return ERROR_INT("PDF header signature not found", procName, 1);

        /* Search for "startxref" starting 50 bytes from the EOF */
    start = 0;
    if (size > 50)
        start = size - 50;
    arrayFindSequence(data + start, size - start,
                      (l_uint8 *)"startxref\n", 10, &loc, &found);
    if (!found)
        return ERROR_INT("startxref not found!", procName, 1);
    if (sscanf((char *)(data + start + loc + 10), "%d\n", &xrefloc) != 1)
        return ERROR_INT("xrefloc not found!", procName, 1);
    if (xrefloc < 0 || xrefloc >= size)
        return ERROR_INT("invalid xrefloc!", procName, 1);
    sa = sarrayCreateLinesFromString((char *)(data + xrefloc), 0);
    str = sarrayGetString(sa, 1, L_NOCOPY);
    if ((sscanf(str, "0 %d", &nobj)) != 1) {
        sarrayDestroy(&sa);
        return ERROR_INT("nobj not found", procName, 1);
    }

        /* Get starting locations.  The numa index is the
         * object number.  loc[0] is the ID; loc[nobj + 1] is xrefloc.  */
    da = l_dnaCreate(nobj + 1);
    *pda = da;
    for (i = 0; i < nobj; i++) {
        str = sarrayGetString(sa, i + 2, L_NOCOPY);
        sscanf(str, "%d", &startloc);
        l_dnaAddNumber(da, startloc);
    }
    l_dnaAddNumber(da, xrefloc);

#if  DEBUG_MULTIPAGE
    fprintf(stderr, "************** Trailer string ************\n");
    fprintf(stderr, "xrefloc = %d", xrefloc);
    sarrayWriteStream(stderr, sa);

    fprintf(stderr, "************** Object locations ************");
    l_dnaWriteStream(stderr, da);
#endif  /* DEBUG_MULTIPAGE */
    sarrayDestroy(&sa);

        /* Verify correct parsing */
    trailer_ok = TRUE;
    for (i = 1; i < nobj; i++) {
        l_dnaGetIValue(da, i, &startloc);
        if ((sscanf((char *)(data + startloc), "%d 0 obj", &objno)) != 1) {
            L_ERROR("bad trailer for object %d\n", procName, i);
            trailer_ok = FALSE;
            break;
        }
    }

        /* If the trailer is broken, reconstruct the correct obj locations */
    if (!trailer_ok) {
        L_INFO("rebuilding pdf trailer\n", procName);
        l_dnaEmpty(da);
        l_dnaAddNumber(da, 0);
        l_byteaFindEachSequence(bas, (l_uint8 *)" 0 obj\n", 7, &daobj);
        nobj = l_dnaGetCount(daobj);
        for (i = 0; i < nobj; i++) {
            l_dnaGetIValue(daobj, i, &loc);
            for (j = loc - 1; j > 0; j--) {
                if (data[j] == nl)
                    break;
            }
            l_dnaAddNumber(da, j + 1);
        }
        l_byteaFindEachSequence(bas, (l_uint8 *)"xref", 4, &daxref);
        l_dnaGetIValue(daxref, 0, &loc);
        l_dnaAddNumber(da, loc);
        l_dnaDestroy(&daobj);
        l_dnaDestroy(&daxref);
    }

    return 0;
}


static char *
generatePagesObjStringPdf(NUMA  *napage)
{
char    *str;
char    *buf;
l_int32  i, n, index, bufsize;
SARRAY  *sa;

    PROCNAME("generatePagesObjStringPdf");

    if (!napage)
        return (char *)ERROR_PTR("napage not defined", procName, NULL);

    n = numaGetCount(napage);
    bufsize = 100 + 16 * n;  /* large enough to hold the output string */
    buf = (char *)LEPT_CALLOC(bufsize, sizeof(char));
    sa = sarrayCreate(n);
    for (i = 0; i < n; i++) {
        numaGetIValue(napage, i, &index);
        snprintf(buf, bufsize, " %d 0 R ", index);
        sarrayAddString(sa, buf, L_COPY);
    }

    str = sarrayToString(sa, 0);
    snprintf(buf, bufsize - 1, "3 0 obj\n"
                               "<<\n"
                               "/Type /Pages\n"
                               "/Kids [%s]\n"
                               "/Count %d\n"
                               ">>\n", str, n);
    sarrayDestroy(&sa);
    LEPT_FREE(str);
    return buf;
}


/*!
 * \brief   substituteObjectNumbers()
 *
 * \param[in]   bas        lba of a pdf object
 * \param[in]   na_objs    object number mapping array
 * \return    bad   lba of rewritten pdf for the object
 *
 * <pre>
 * Notes:
 *      (1) Interpret the first set of bytes as the object number,
 *          map to the new number, and write it out.
 *      (2) Find all occurrences of this 4-byte sequence: " 0 R"
 *      (3) Find the location and value of the integer preceding this,
 *          and map it to the new value.
 *      (4) Rewrite the object with new object numbers.
 * </pre>
 */
static L_BYTEA *
substituteObjectNumbers(L_BYTEA  *bas,
                        NUMA     *na_objs)
{
l_uint8   space = ' ';
l_uint8  *datas;
l_uint8   buf[32];  /* only needs to hold one integer in ascii format */
l_int32   start, nrepl, i, j, objin, objout, found;
l_int32  *objs, *matches;
size_t    size;
L_BYTEA  *bad;
L_DNA    *da_match;

    datas = l_byteaGetData(bas, &size);
    bad = l_byteaCreate(100);
    objs = numaGetIArray(na_objs);  /* object number mapper */

        /* Substitute the object number on the first line */
    sscanf((char *)datas, "%d", &objin);
    objout = objs[objin];
    snprintf((char *)buf, 32, "%d", objout);
    l_byteaAppendString(bad, (char *)buf);

        /* Find the set of matching locations for object references */
    arrayFindSequence(datas, size, &space, 1, &start, &found);
    da_match = arrayFindEachSequence(datas, size, (l_uint8 *)" 0 R", 4);
    if (!da_match) {
        l_byteaAppendData(bad, datas + start, size - start);
        LEPT_FREE(objs);
        return bad;
    }

        /* Substitute all the object reference numbers */
    nrepl = l_dnaGetCount(da_match);
    matches = l_dnaGetIArray(da_match);
    for (i = 0; i < nrepl; i++) {
            /* Find the first space before the object number */
        for (j = matches[i] - 1; j > 0; j--) {
            if (datas[j] == space)
                break;
        }
            /* Copy bytes from 'start' up to the object number */
        l_byteaAppendData(bad, datas + start, j - start + 1);
        sscanf((char *)(datas + j + 1), "%d", &objin);
        objout = objs[objin];
        snprintf((char *)buf, 32, "%d", objout);
        l_byteaAppendString(bad, (char *)buf);
        start = matches[i];
    }
    l_byteaAppendData(bad, datas + start, size - start);

    LEPT_FREE(objs);
    LEPT_FREE(matches);
    l_dnaDestroy(&da_match);
    return bad;
}


/*---------------------------------------------------------------------*
 *                     Create/destroy/access pdf data                  *
 *---------------------------------------------------------------------*/
static L_PDF_DATA *
pdfdataCreate(const char  *title)
{
L_PDF_DATA *lpd;

    lpd = (L_PDF_DATA *)LEPT_CALLOC(1, sizeof(L_PDF_DATA));
    if (title) lpd->title = stringNew(title);
    lpd->cida = ptraCreate(10);
    lpd->xy = ptaCreate(10);
    lpd->wh = ptaCreate(10);
    lpd->saprex = sarrayCreate(10);
    lpd->sacmap = sarrayCreate(10);
    lpd->objsize = l_dnaCreate(20);
    lpd->objloc = l_dnaCreate(20);
    return lpd;
}

static void
pdfdataDestroy(L_PDF_DATA  **plpd)
{
l_int32       i;
L_COMP_DATA  *cid;
L_PDF_DATA   *lpd;

    PROCNAME("pdfdataDestroy");

    if (plpd== NULL) {
        L_WARNING("ptr address is null!\n", procName);
        return;
    }
    if ((lpd = *plpd) == NULL)
        return;

    if (lpd->title) LEPT_FREE(lpd->title);
    for (i = 0; i < lpd->n; i++) {
        cid = (L_COMP_DATA *)ptraRemove(lpd->cida, i, L_NO_COMPACTION);
        l_CIDataDestroy(&cid);
    }

    ptraDestroy(&lpd->cida, 0, 0);
    if (lpd->id) LEPT_FREE(lpd->id);
    if (lpd->obj1) LEPT_FREE(lpd->obj1);
    if (lpd->obj2) LEPT_FREE(lpd->obj2);
    if (lpd->obj3) LEPT_FREE(lpd->obj3);
    if (lpd->obj4) LEPT_FREE(lpd->obj4);
    if (lpd->obj5) LEPT_FREE(lpd->obj5);
    if (lpd->poststream) LEPT_FREE(lpd->poststream);
    if (lpd->trailer) LEPT_FREE(lpd->trailer);
    if (lpd->xy) ptaDestroy(&lpd->xy);
    if (lpd->wh) ptaDestroy(&lpd->wh);
    if (lpd->mediabox) boxDestroy(&lpd->mediabox);
    if (lpd->saprex) sarrayDestroy(&lpd->saprex);
    if (lpd->sacmap) sarrayDestroy(&lpd->sacmap);
    if (lpd->objsize) l_dnaDestroy(&lpd->objsize);
    if (lpd->objloc) l_dnaDestroy(&lpd->objloc);
    LEPT_FREE(lpd);
    *plpd = NULL;
    return;
}


static L_COMP_DATA *
pdfdataGetCid(L_PDF_DATA  *lpd,
              l_int32      index)
{
    PROCNAME("pdfdataGetCid");

    if (!lpd)
        return (L_COMP_DATA *)ERROR_PTR("lpd not defined", procName, NULL);
    if (index < 0 || index >= lpd->n)
        return (L_COMP_DATA *)ERROR_PTR("invalid image index", procName, NULL);

    return (L_COMP_DATA *)ptraGetPtrToItem(lpd->cida, index);
}


/*---------------------------------------------------------------------*
 *                       Set flags for special modes                   *
 *---------------------------------------------------------------------*/
/*!
 * \brief   l_pdfSetG4ImageMask()
 *
 * \param[in]    flag    1 for writing g4 data as fg only through a mask;
 *                       0 for writing fg and bg
 * \return  void
 *
 * <pre>
 * Notes:
 *      (1) The default is for writing only the fg (through the mask).
 *          That way when you write a 1 bpp image, the bg is transparent,
 *          so any previously written image remains visible behind it.
 * </pre>
 */
void
l_pdfSetG4ImageMask(l_int32  flag)
{
    var_WRITE_G4_IMAGE_MASK = flag;
}


/*!
 * \brief   l_pdfSetDateAndVersion()
 *
 * \param[in]    flag    1 for writing date/time and leptonica version;
 *                       0 for omitting this from the metadata
 * \return  void
 *
 * <pre>
 * Notes:
 *      (1) The default is for writing this data.  For regression tests
 *          that compare output against golden files, it is useful to omit.
 * </pre>
 */
void
l_pdfSetDateAndVersion(l_int32  flag)
{
    var_WRITE_DATE_AND_VERSION = flag;
}

/* --------------------------------------------*/
#endif  /* USE_PDFIO */
/* --------------------------------------------*/