001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.imaging.formats.jpeg.iptc;
019
020import static org.apache.commons.imaging.common.BinaryFunctions.read2Bytes;
021import static org.apache.commons.imaging.common.BinaryFunctions.read4Bytes;
022import static org.apache.commons.imaging.common.BinaryFunctions.readByte;
023import static org.apache.commons.imaging.common.BinaryFunctions.readBytes;
024import static org.apache.commons.imaging.common.BinaryFunctions.slice;
025import static org.apache.commons.imaging.common.BinaryFunctions.startsWith;
026
027import java.io.ByteArrayInputStream;
028import java.io.ByteArrayOutputStream;
029import java.io.IOException;
030import java.io.InputStream;
031import java.nio.ByteOrder;
032import java.nio.charset.StandardCharsets;
033import java.util.ArrayList;
034import java.util.Arrays;
035import java.util.Collections;
036import java.util.Comparator;
037import java.util.List;
038import java.util.Map;
039import java.util.logging.Level;
040import java.util.logging.Logger;
041
042import org.apache.commons.imaging.ImageReadException;
043import org.apache.commons.imaging.ImageWriteException;
044import org.apache.commons.imaging.ImagingConstants;
045import org.apache.commons.imaging.common.BinaryFileParser;
046import org.apache.commons.imaging.common.BinaryFunctions;
047import org.apache.commons.imaging.common.BinaryOutputStream;
048import org.apache.commons.imaging.common.ByteConversions;
049import org.apache.commons.imaging.formats.jpeg.JpegConstants;
050import org.apache.commons.imaging.internal.Debug;
051
052public class IptcParser extends BinaryFileParser {
053
054    private static final Logger LOGGER = Logger.getLogger(IptcParser.class.getName());
055
056    private static final ByteOrder APP13_BYTE_ORDER = ByteOrder.BIG_ENDIAN;
057
058    /**
059     * Block types (or Image Resource IDs) that are not recommended to be
060     * interpreted when libraries process Photoshop IPTC metadata.
061     *
062     * @see https://www.adobe.com/devnet-apps/photoshop/fileformatashtml/
063     * @see https://issues.apache.org/jira/browse/IMAGING-246
064     * @since 1.0-alpha2
065     */
066    private static final List<Integer> PHOTOSHOP_IGNORED_BLOCK_TYPE = Arrays.asList(1084, 1085, 1086, 1087);
067
068    public IptcParser() {
069        setByteOrder(ByteOrder.BIG_ENDIAN);
070    }
071
072    public boolean isPhotoshopJpegSegment(final byte[] segmentData) {
073        if (!startsWith(segmentData,
074                JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING)) {
075            return false;
076        }
077
078        final int index = JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.size();
079        return (index + 4) <= segmentData.length
080                && ByteConversions.toInt(segmentData, index, APP13_BYTE_ORDER) == JpegConstants.CONST_8BIM;
081    }
082
083    /*
084     * In practice, App13 segments are only used for Photoshop/IPTC metadata.
085     * However, we should not treat App13 signatures without Photoshop's
086     * signature as Photoshop/IPTC segments.
087     *
088     * A Photoshop/IPTC App13 segment begins with the Photoshop Identification
089     * string.
090     *
091     * There follows 0-N blocks (Photoshop calls them "Image Resource Blocks").
092     *
093     * Each block has the following structure:
094     *
095     * 1. 4-byte type. This is always "8BIM" for blocks in a Photoshop App13
096     * segment. 2. 2-byte id. IPTC data is stored in blocks with id 0x0404, aka.
097     * IPTC_NAA_RECORD_IMAGE_RESOURCE_ID 3. Block name as a Pascal String. This
098     * is padded to have an even length. 4. 4-byte size (in bytes). 5. Block
099     * data. This is also padded to have an even length.
100     *
101     * The block data consists of a 0-N records. A record has the following
102     * structure:
103     *
104     * 1. 2-byte prefix. The value is always 0x1C02 2. 1-byte record type. The
105     * record types are documented by the IPTC. See IptcConstants. 3. 2-byte
106     * record size (in bytes). 4. Record data, "record size" bytes long.
107     *
108     * Record data (unlike block data) is NOT padded to have an even length.
109     *
110     * Record data, for IPTC record, should always be ISO-8859-1. But according
111     * to SANSELAN-33, this isn't always the case.
112     *
113     * The exception is the first record in the block, which must always be a
114     * record version record, whose value is a two-byte number; the value is
115     * 0x02.
116     *
117     * Some IPTC blocks are missing this first "record version" record, so we
118     * don't require it.
119     */
120    public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes, final Map<String, Object> params)
121            throws ImageReadException, IOException {
122        final boolean strict =  params != null && Boolean.TRUE.equals(params.get(ImagingConstants.PARAM_KEY_STRICT));
123
124        return parsePhotoshopSegment(bytes, strict);
125    }
126
127    public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes, final boolean strict) throws ImageReadException,
128            IOException {
129        final List<IptcRecord> records = new ArrayList<>();
130
131        final List<IptcBlock> blocks = parseAllBlocks(bytes, strict);
132
133        for (final IptcBlock block : blocks) {
134            // Ignore everything but IPTC data.
135            if (!block.isIPTCBlock()) {
136                continue;
137            }
138
139            records.addAll(parseIPTCBlock(block.getBlockData()));
140        }
141
142        return new PhotoshopApp13Data(records, blocks);
143    }
144
145    protected List<IptcRecord> parseIPTCBlock(final byte[] bytes)
146            throws IOException {
147        final List<IptcRecord> elements = new ArrayList<>();
148
149        int index = 0;
150        // Integer recordVersion = null;
151        while (index + 1 < bytes.length) {
152            final int tagMarker = 0xff & bytes[index++];
153            Debug.debug("tagMarker: " + tagMarker + " (0x" + Integer.toHexString(tagMarker) + ")");
154
155            if (tagMarker != IptcConstants.IPTC_RECORD_TAG_MARKER) {
156                if (LOGGER.isLoggable(Level.FINE)) {
157                    LOGGER.fine("Unexpected record tag marker in IPTC data.");
158                }
159                return elements;
160            }
161
162            final int recordNumber = 0xff & bytes[index++];
163            Debug.debug("recordNumber: " + recordNumber + " (0x" + Integer.toHexString(recordNumber) + ")");
164
165            // int recordPrefix = convertByteArrayToShort("recordPrefix", index,
166            // bytes);
167            // if (verbose)
168            // Debug.debug("recordPrefix", recordPrefix + " (0x"
169            // + Integer.toHexString(recordPrefix) + ")");
170            // index += 2;
171            //
172            // if (recordPrefix != IPTC_RECORD_PREFIX)
173            // {
174            // if (verbose)
175            // System.out
176            // .println("Unexpected record prefix in IPTC data!");
177            // return elements;
178            // }
179
180            // throw new ImageReadException(
181            // "Unexpected record prefix in IPTC data.");
182
183            final int recordType = 0xff & bytes[index];
184            Debug.debug("recordType: " + recordType + " (0x" + Integer.toHexString(recordType) + ")");
185            index++;
186
187            final int recordSize = ByteConversions.toUInt16(bytes, index, getByteOrder());
188            index += 2;
189
190            final boolean extendedDataset = recordSize > IptcConstants.IPTC_NON_EXTENDED_RECORD_MAXIMUM_SIZE;
191            final int dataFieldCountLength = recordSize & 0x7fff;
192            if (extendedDataset) {
193                Debug.debug("extendedDataset. dataFieldCountLength: " + dataFieldCountLength);
194            }
195            if (extendedDataset) {
196                // ignore extended dataset and everything after.
197                return elements;
198            }
199
200            final byte[] recordData = slice(bytes, index, recordSize);
201            index += recordSize;
202
203            // Debug.debug("recordSize", recordSize + " (0x"
204            // + Integer.toHexString(recordSize) + ")");
205
206            if (recordNumber != IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER) {
207                continue;
208            }
209
210            if (recordType == 0) {
211                if (LOGGER.isLoggable(Level.FINE)) {
212                    LOGGER.fine("ignore record version record! " + elements.size());
213                }
214                // ignore "record version" record;
215                continue;
216            }
217            // if (recordVersion == null)
218            // {
219            // // The first record in a JPEG/Photoshop IPTC block must be
220            // // the record version.
221            // if (recordType != 0)
222            // throw new ImageReadException("Missing record version: "
223            // + recordType);
224            // recordVersion = new Integer(convertByteArrayToShort(
225            // "recordNumber", recordData));
226            //
227            // if (recordSize != 2)
228            // throw new ImageReadException(
229            // "Invalid record version record size: " + recordSize);
230            //
231            // // JPEG/Photoshop IPTC metadata is always in Record version
232            // // 2
233            // if (recordVersion.intValue() != 2)
234            // throw new ImageReadException(
235            // "Invalid IPTC record version: " + recordVersion);
236            //
237            // // Debug.debug("recordVersion", recordVersion);
238            // continue;
239            // }
240
241            final String value = new String(recordData, StandardCharsets.ISO_8859_1);
242
243            final IptcType iptcType = IptcTypeLookup.getIptcType(recordType);
244
245            // Debug.debug("iptcType", iptcType);
246            // debugByteArray("iptcData", iptcData);
247            // Debug.debug();
248
249            // if (recordType == IPTC_TYPE_CREDIT.type
250            // || recordType == IPTC_TYPE_OBJECT_NAME.type)
251            // {
252            // this.debugByteArray("recordData", recordData);
253            // Debug.debug("index", IPTC_TYPE_CREDIT.name);
254            // }
255
256            final IptcRecord element = new IptcRecord(iptcType, value);
257            elements.add(element);
258        }
259
260        return elements;
261    }
262
263    protected List<IptcBlock> parseAllBlocks(final byte[] bytes,
264            final boolean strict) throws ImageReadException, IOException {
265        final List<IptcBlock> blocks = new ArrayList<>();
266
267        try (InputStream bis = new ByteArrayInputStream(bytes)) {
268
269            // Note that these are unsigned quantities. Name is always an even
270            // number of bytes (including the 1st byte, which is the size.)
271
272            final byte[] idString = readBytes("", bis,
273                    JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.size(),
274                    "App13 Segment missing identification string");
275            if (!JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.equals(idString)) {
276                throw new ImageReadException("Not a Photoshop App13 Segment");
277            }
278
279            // int index = PHOTOSHOP_IDENTIFICATION_STRING.length;
280
281            while (true) {
282                final int imageResourceBlockSignature;
283                try {
284                    imageResourceBlockSignature = read4Bytes("", bis,
285                            "Image Resource Block missing identification string", APP13_BYTE_ORDER);
286                } catch (final IOException ioEx) {
287                    break;
288                }
289                if (imageResourceBlockSignature != JpegConstants.CONST_8BIM) {
290                    throw new ImageReadException(
291                            "Invalid Image Resource Block Signature");
292                }
293
294                final int blockType = read2Bytes("", bis, "Image Resource Block missing type", APP13_BYTE_ORDER);
295                Debug.debug("blockType: " + blockType + " (0x" + Integer.toHexString(blockType) + ")");
296
297                // skip blocks that the photoshop spec recommends to, see IMAGING-246
298                if (PHOTOSHOP_IGNORED_BLOCK_TYPE.contains(blockType)) {
299                    Debug.debug("Skipping blockType: " + blockType + " (0x" + Integer.toHexString(blockType) + ")");
300                    // if there is still data in this block, before the next image resource block
301                    // (8BIM), then we must consume these bytes to leave a pointer ready to read
302                    // the next block
303                    BinaryFunctions.searchQuad(JpegConstants.CONST_8BIM, bis);
304                    continue;
305                }
306
307                final int blockNameLength = readByte("Name length", bis, "Image Resource Block missing name length");
308                if (blockNameLength > 0) {
309                    Debug.debug("blockNameLength: " + blockNameLength + " (0x"
310                            + Integer.toHexString(blockNameLength) + ")");
311                }
312                byte[] blockNameBytes;
313                if (blockNameLength == 0) {
314                    readByte("Block name bytes", bis, "Image Resource Block has invalid name");
315                    blockNameBytes = new byte[0];
316                } else {
317                    try {
318                        blockNameBytes = readBytes("", bis, blockNameLength,
319                                "Invalid Image Resource Block name");
320                    } catch (final IOException ioEx) {
321                        if (strict) {
322                            throw ioEx;
323                        }
324                        break;
325                    }
326
327                    if (blockNameLength % 2 == 0) {
328                        readByte("Padding byte", bis, "Image Resource Block missing padding byte");
329                    }
330                }
331
332                final int blockSize = read4Bytes("", bis, "Image Resource Block missing size", APP13_BYTE_ORDER);
333                Debug.debug("blockSize: " + blockSize + " (0x" + Integer.toHexString(blockSize) + ")");
334
335                /*
336                 * doesn't catch cases where blocksize is invalid but is still less
337                 * than bytes.length but will at least prevent OutOfMemory errors
338                 */
339                if (blockSize > bytes.length) {
340                    throw new ImageReadException("Invalid Block Size : " + blockSize + " > " + bytes.length);
341                }
342
343                final byte[] blockData;
344                try {
345                    blockData = readBytes("", bis, blockSize, "Invalid Image Resource Block data");
346                } catch (final IOException ioEx) {
347                    if (strict) {
348                        throw ioEx;
349                    }
350                    break;
351                }
352
353                blocks.add(new IptcBlock(blockType, blockNameBytes, blockData));
354
355                if ((blockSize % 2) != 0) {
356                    readByte("Padding byte", bis, "Image Resource Block missing padding byte");
357                }
358            }
359
360            return blocks;
361        }
362    }
363
364    // private void writeIPTCRecord(BinaryOutputStream bos, )
365
366    public byte[] writePhotoshopApp13Segment(final PhotoshopApp13Data data)
367            throws IOException, ImageWriteException {
368        final ByteArrayOutputStream os = new ByteArrayOutputStream();
369        final BinaryOutputStream bos = new BinaryOutputStream(os);
370
371        JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.writeTo(bos);
372
373        final List<IptcBlock> blocks = data.getRawBlocks();
374        for (final IptcBlock block : blocks) {
375            bos.write4Bytes(JpegConstants.CONST_8BIM);
376
377            if (block.getBlockType() < 0 || block.getBlockType() > 0xffff) {
378                throw new ImageWriteException("Invalid IPTC block type.");
379            }
380            bos.write2Bytes(block.getBlockType());
381
382            final byte[] blockNameBytes = block.getBlockNameBytes();
383            if (blockNameBytes.length > 255) {
384                throw new ImageWriteException("IPTC block name is too long: " + blockNameBytes.length);
385            }
386            bos.write(blockNameBytes.length);
387            bos.write(blockNameBytes);
388            if (blockNameBytes.length % 2 == 0) {
389                bos.write(0); // pad to even size, including length byte.
390            }
391
392            final byte[] blockData = block.getBlockData();
393            if (blockData.length > IptcConstants.IPTC_NON_EXTENDED_RECORD_MAXIMUM_SIZE) {
394                throw new ImageWriteException("IPTC block data is too long: " + blockData.length);
395            }
396            bos.write4Bytes(blockData.length);
397            bos.write(blockData);
398            if (blockData.length % 2 == 1) {
399                bos.write(0); // pad to even size
400            }
401        }
402
403        bos.flush();
404        return os.toByteArray();
405    }
406
407    public byte[] writeIPTCBlock(List<IptcRecord> elements)
408            throws ImageWriteException, IOException {
409        byte[] blockData;
410        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
411        try (BinaryOutputStream bos = new BinaryOutputStream(baos, getByteOrder())) {
412
413            // first, right record version record
414            bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
415            bos.write(IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER);
416            bos.write(IptcTypes.RECORD_VERSION.type); // record version record
417                                                      // type.
418            bos.write2Bytes(2); // record version record size
419            bos.write2Bytes(2); // record version value
420
421            // make a copy of the list.
422            elements = new ArrayList<>(elements);
423
424            // sort the list. Records must be in numerical order.
425            final Comparator<IptcRecord> comparator = (e1, e2) -> e2.iptcType.getType() - e1.iptcType.getType();
426            Collections.sort(elements, comparator);
427            // TODO: make sure order right
428
429            // write the list.
430            for (final IptcRecord element : elements) {
431                if (element.iptcType == IptcTypes.RECORD_VERSION) {
432                    continue; // ignore
433                }
434
435                bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER);
436                bos.write(IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER);
437                if (element.iptcType.getType() < 0
438                        || element.iptcType.getType() > 0xff) {
439                    throw new ImageWriteException("Invalid record type: "
440                            + element.iptcType.getType());
441                }
442                bos.write(element.iptcType.getType());
443
444                final byte[] recordData = element.getValue().getBytes(StandardCharsets.ISO_8859_1);
445                if (!new String(recordData, StandardCharsets.ISO_8859_1).equals(element.getValue())) {
446                    throw new ImageWriteException(
447                            "Invalid record value, not ISO-8859-1");
448                }
449
450                bos.write2Bytes(recordData.length);
451                bos.write(recordData);
452            }
453        }
454
455        blockData = baos.toByteArray();
456
457        return blockData;
458    }
459
460}