001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.imaging.formats.jpeg.iptc; 019 020import static org.apache.commons.imaging.common.BinaryFunctions.read2Bytes; 021import static org.apache.commons.imaging.common.BinaryFunctions.read4Bytes; 022import static org.apache.commons.imaging.common.BinaryFunctions.readByte; 023import static org.apache.commons.imaging.common.BinaryFunctions.readBytes; 024import static org.apache.commons.imaging.common.BinaryFunctions.slice; 025import static org.apache.commons.imaging.common.BinaryFunctions.startsWith; 026 027import java.io.ByteArrayInputStream; 028import java.io.ByteArrayOutputStream; 029import java.io.IOException; 030import java.io.InputStream; 031import java.nio.ByteOrder; 032import java.nio.charset.StandardCharsets; 033import java.util.ArrayList; 034import java.util.Arrays; 035import java.util.Collections; 036import java.util.Comparator; 037import java.util.List; 038import java.util.Map; 039import java.util.logging.Level; 040import java.util.logging.Logger; 041 042import org.apache.commons.imaging.ImageReadException; 043import org.apache.commons.imaging.ImageWriteException; 044import org.apache.commons.imaging.ImagingConstants; 045import org.apache.commons.imaging.common.BinaryFileParser; 046import org.apache.commons.imaging.common.BinaryFunctions; 047import org.apache.commons.imaging.common.BinaryOutputStream; 048import org.apache.commons.imaging.common.ByteConversions; 049import org.apache.commons.imaging.formats.jpeg.JpegConstants; 050import org.apache.commons.imaging.internal.Debug; 051 052public class IptcParser extends BinaryFileParser { 053 054 private static final Logger LOGGER = Logger.getLogger(IptcParser.class.getName()); 055 056 private static final ByteOrder APP13_BYTE_ORDER = ByteOrder.BIG_ENDIAN; 057 058 /** 059 * Block types (or Image Resource IDs) that are not recommended to be 060 * interpreted when libraries process Photoshop IPTC metadata. 061 * 062 * @see https://www.adobe.com/devnet-apps/photoshop/fileformatashtml/ 063 * @see https://issues.apache.org/jira/browse/IMAGING-246 064 * @since 1.0-alpha2 065 */ 066 private static final List<Integer> PHOTOSHOP_IGNORED_BLOCK_TYPE = Arrays.asList(1084, 1085, 1086, 1087); 067 068 public IptcParser() { 069 setByteOrder(ByteOrder.BIG_ENDIAN); 070 } 071 072 public boolean isPhotoshopJpegSegment(final byte[] segmentData) { 073 if (!startsWith(segmentData, 074 JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING)) { 075 return false; 076 } 077 078 final int index = JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.size(); 079 return (index + 4) <= segmentData.length 080 && ByteConversions.toInt(segmentData, index, APP13_BYTE_ORDER) == JpegConstants.CONST_8BIM; 081 } 082 083 /* 084 * In practice, App13 segments are only used for Photoshop/IPTC metadata. 085 * However, we should not treat App13 signatures without Photoshop's 086 * signature as Photoshop/IPTC segments. 087 * 088 * A Photoshop/IPTC App13 segment begins with the Photoshop Identification 089 * string. 090 * 091 * There follows 0-N blocks (Photoshop calls them "Image Resource Blocks"). 092 * 093 * Each block has the following structure: 094 * 095 * 1. 4-byte type. This is always "8BIM" for blocks in a Photoshop App13 096 * segment. 2. 2-byte id. IPTC data is stored in blocks with id 0x0404, aka. 097 * IPTC_NAA_RECORD_IMAGE_RESOURCE_ID 3. Block name as a Pascal String. This 098 * is padded to have an even length. 4. 4-byte size (in bytes). 5. Block 099 * data. This is also padded to have an even length. 100 * 101 * The block data consists of a 0-N records. A record has the following 102 * structure: 103 * 104 * 1. 2-byte prefix. The value is always 0x1C02 2. 1-byte record type. The 105 * record types are documented by the IPTC. See IptcConstants. 3. 2-byte 106 * record size (in bytes). 4. Record data, "record size" bytes long. 107 * 108 * Record data (unlike block data) is NOT padded to have an even length. 109 * 110 * Record data, for IPTC record, should always be ISO-8859-1. But according 111 * to SANSELAN-33, this isn't always the case. 112 * 113 * The exception is the first record in the block, which must always be a 114 * record version record, whose value is a two-byte number; the value is 115 * 0x02. 116 * 117 * Some IPTC blocks are missing this first "record version" record, so we 118 * don't require it. 119 */ 120 public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes, final Map<String, Object> params) 121 throws ImageReadException, IOException { 122 final boolean strict = params != null && Boolean.TRUE.equals(params.get(ImagingConstants.PARAM_KEY_STRICT)); 123 124 return parsePhotoshopSegment(bytes, strict); 125 } 126 127 public PhotoshopApp13Data parsePhotoshopSegment(final byte[] bytes, final boolean strict) throws ImageReadException, 128 IOException { 129 final List<IptcRecord> records = new ArrayList<>(); 130 131 final List<IptcBlock> blocks = parseAllBlocks(bytes, strict); 132 133 for (final IptcBlock block : blocks) { 134 // Ignore everything but IPTC data. 135 if (!block.isIPTCBlock()) { 136 continue; 137 } 138 139 records.addAll(parseIPTCBlock(block.getBlockData())); 140 } 141 142 return new PhotoshopApp13Data(records, blocks); 143 } 144 145 protected List<IptcRecord> parseIPTCBlock(final byte[] bytes) 146 throws IOException { 147 final List<IptcRecord> elements = new ArrayList<>(); 148 149 int index = 0; 150 // Integer recordVersion = null; 151 while (index + 1 < bytes.length) { 152 final int tagMarker = 0xff & bytes[index++]; 153 Debug.debug("tagMarker: " + tagMarker + " (0x" + Integer.toHexString(tagMarker) + ")"); 154 155 if (tagMarker != IptcConstants.IPTC_RECORD_TAG_MARKER) { 156 if (LOGGER.isLoggable(Level.FINE)) { 157 LOGGER.fine("Unexpected record tag marker in IPTC data."); 158 } 159 return elements; 160 } 161 162 final int recordNumber = 0xff & bytes[index++]; 163 Debug.debug("recordNumber: " + recordNumber + " (0x" + Integer.toHexString(recordNumber) + ")"); 164 165 // int recordPrefix = convertByteArrayToShort("recordPrefix", index, 166 // bytes); 167 // if (verbose) 168 // Debug.debug("recordPrefix", recordPrefix + " (0x" 169 // + Integer.toHexString(recordPrefix) + ")"); 170 // index += 2; 171 // 172 // if (recordPrefix != IPTC_RECORD_PREFIX) 173 // { 174 // if (verbose) 175 // System.out 176 // .println("Unexpected record prefix in IPTC data!"); 177 // return elements; 178 // } 179 180 // throw new ImageReadException( 181 // "Unexpected record prefix in IPTC data."); 182 183 final int recordType = 0xff & bytes[index]; 184 Debug.debug("recordType: " + recordType + " (0x" + Integer.toHexString(recordType) + ")"); 185 index++; 186 187 final int recordSize = ByteConversions.toUInt16(bytes, index, getByteOrder()); 188 index += 2; 189 190 final boolean extendedDataset = recordSize > IptcConstants.IPTC_NON_EXTENDED_RECORD_MAXIMUM_SIZE; 191 final int dataFieldCountLength = recordSize & 0x7fff; 192 if (extendedDataset) { 193 Debug.debug("extendedDataset. dataFieldCountLength: " + dataFieldCountLength); 194 } 195 if (extendedDataset) { 196 // ignore extended dataset and everything after. 197 return elements; 198 } 199 200 final byte[] recordData = slice(bytes, index, recordSize); 201 index += recordSize; 202 203 // Debug.debug("recordSize", recordSize + " (0x" 204 // + Integer.toHexString(recordSize) + ")"); 205 206 if (recordNumber != IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER) { 207 continue; 208 } 209 210 if (recordType == 0) { 211 if (LOGGER.isLoggable(Level.FINE)) { 212 LOGGER.fine("ignore record version record! " + elements.size()); 213 } 214 // ignore "record version" record; 215 continue; 216 } 217 // if (recordVersion == null) 218 // { 219 // // The first record in a JPEG/Photoshop IPTC block must be 220 // // the record version. 221 // if (recordType != 0) 222 // throw new ImageReadException("Missing record version: " 223 // + recordType); 224 // recordVersion = new Integer(convertByteArrayToShort( 225 // "recordNumber", recordData)); 226 // 227 // if (recordSize != 2) 228 // throw new ImageReadException( 229 // "Invalid record version record size: " + recordSize); 230 // 231 // // JPEG/Photoshop IPTC metadata is always in Record version 232 // // 2 233 // if (recordVersion.intValue() != 2) 234 // throw new ImageReadException( 235 // "Invalid IPTC record version: " + recordVersion); 236 // 237 // // Debug.debug("recordVersion", recordVersion); 238 // continue; 239 // } 240 241 final String value = new String(recordData, StandardCharsets.ISO_8859_1); 242 243 final IptcType iptcType = IptcTypeLookup.getIptcType(recordType); 244 245 // Debug.debug("iptcType", iptcType); 246 // debugByteArray("iptcData", iptcData); 247 // Debug.debug(); 248 249 // if (recordType == IPTC_TYPE_CREDIT.type 250 // || recordType == IPTC_TYPE_OBJECT_NAME.type) 251 // { 252 // this.debugByteArray("recordData", recordData); 253 // Debug.debug("index", IPTC_TYPE_CREDIT.name); 254 // } 255 256 final IptcRecord element = new IptcRecord(iptcType, value); 257 elements.add(element); 258 } 259 260 return elements; 261 } 262 263 protected List<IptcBlock> parseAllBlocks(final byte[] bytes, 264 final boolean strict) throws ImageReadException, IOException { 265 final List<IptcBlock> blocks = new ArrayList<>(); 266 267 try (InputStream bis = new ByteArrayInputStream(bytes)) { 268 269 // Note that these are unsigned quantities. Name is always an even 270 // number of bytes (including the 1st byte, which is the size.) 271 272 final byte[] idString = readBytes("", bis, 273 JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.size(), 274 "App13 Segment missing identification string"); 275 if (!JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.equals(idString)) { 276 throw new ImageReadException("Not a Photoshop App13 Segment"); 277 } 278 279 // int index = PHOTOSHOP_IDENTIFICATION_STRING.length; 280 281 while (true) { 282 final int imageResourceBlockSignature; 283 try { 284 imageResourceBlockSignature = read4Bytes("", bis, 285 "Image Resource Block missing identification string", APP13_BYTE_ORDER); 286 } catch (final IOException ioEx) { 287 break; 288 } 289 if (imageResourceBlockSignature != JpegConstants.CONST_8BIM) { 290 throw new ImageReadException( 291 "Invalid Image Resource Block Signature"); 292 } 293 294 final int blockType = read2Bytes("", bis, "Image Resource Block missing type", APP13_BYTE_ORDER); 295 Debug.debug("blockType: " + blockType + " (0x" + Integer.toHexString(blockType) + ")"); 296 297 // skip blocks that the photoshop spec recommends to, see IMAGING-246 298 if (PHOTOSHOP_IGNORED_BLOCK_TYPE.contains(blockType)) { 299 Debug.debug("Skipping blockType: " + blockType + " (0x" + Integer.toHexString(blockType) + ")"); 300 // if there is still data in this block, before the next image resource block 301 // (8BIM), then we must consume these bytes to leave a pointer ready to read 302 // the next block 303 BinaryFunctions.searchQuad(JpegConstants.CONST_8BIM, bis); 304 continue; 305 } 306 307 final int blockNameLength = readByte("Name length", bis, "Image Resource Block missing name length"); 308 if (blockNameLength > 0) { 309 Debug.debug("blockNameLength: " + blockNameLength + " (0x" 310 + Integer.toHexString(blockNameLength) + ")"); 311 } 312 byte[] blockNameBytes; 313 if (blockNameLength == 0) { 314 readByte("Block name bytes", bis, "Image Resource Block has invalid name"); 315 blockNameBytes = new byte[0]; 316 } else { 317 try { 318 blockNameBytes = readBytes("", bis, blockNameLength, 319 "Invalid Image Resource Block name"); 320 } catch (final IOException ioEx) { 321 if (strict) { 322 throw ioEx; 323 } 324 break; 325 } 326 327 if (blockNameLength % 2 == 0) { 328 readByte("Padding byte", bis, "Image Resource Block missing padding byte"); 329 } 330 } 331 332 final int blockSize = read4Bytes("", bis, "Image Resource Block missing size", APP13_BYTE_ORDER); 333 Debug.debug("blockSize: " + blockSize + " (0x" + Integer.toHexString(blockSize) + ")"); 334 335 /* 336 * doesn't catch cases where blocksize is invalid but is still less 337 * than bytes.length but will at least prevent OutOfMemory errors 338 */ 339 if (blockSize > bytes.length) { 340 throw new ImageReadException("Invalid Block Size : " + blockSize + " > " + bytes.length); 341 } 342 343 final byte[] blockData; 344 try { 345 blockData = readBytes("", bis, blockSize, "Invalid Image Resource Block data"); 346 } catch (final IOException ioEx) { 347 if (strict) { 348 throw ioEx; 349 } 350 break; 351 } 352 353 blocks.add(new IptcBlock(blockType, blockNameBytes, blockData)); 354 355 if ((blockSize % 2) != 0) { 356 readByte("Padding byte", bis, "Image Resource Block missing padding byte"); 357 } 358 } 359 360 return blocks; 361 } 362 } 363 364 // private void writeIPTCRecord(BinaryOutputStream bos, ) 365 366 public byte[] writePhotoshopApp13Segment(final PhotoshopApp13Data data) 367 throws IOException, ImageWriteException { 368 final ByteArrayOutputStream os = new ByteArrayOutputStream(); 369 final BinaryOutputStream bos = new BinaryOutputStream(os); 370 371 JpegConstants.PHOTOSHOP_IDENTIFICATION_STRING.writeTo(bos); 372 373 final List<IptcBlock> blocks = data.getRawBlocks(); 374 for (final IptcBlock block : blocks) { 375 bos.write4Bytes(JpegConstants.CONST_8BIM); 376 377 if (block.getBlockType() < 0 || block.getBlockType() > 0xffff) { 378 throw new ImageWriteException("Invalid IPTC block type."); 379 } 380 bos.write2Bytes(block.getBlockType()); 381 382 final byte[] blockNameBytes = block.getBlockNameBytes(); 383 if (blockNameBytes.length > 255) { 384 throw new ImageWriteException("IPTC block name is too long: " + blockNameBytes.length); 385 } 386 bos.write(blockNameBytes.length); 387 bos.write(blockNameBytes); 388 if (blockNameBytes.length % 2 == 0) { 389 bos.write(0); // pad to even size, including length byte. 390 } 391 392 final byte[] blockData = block.getBlockData(); 393 if (blockData.length > IptcConstants.IPTC_NON_EXTENDED_RECORD_MAXIMUM_SIZE) { 394 throw new ImageWriteException("IPTC block data is too long: " + blockData.length); 395 } 396 bos.write4Bytes(blockData.length); 397 bos.write(blockData); 398 if (blockData.length % 2 == 1) { 399 bos.write(0); // pad to even size 400 } 401 } 402 403 bos.flush(); 404 return os.toByteArray(); 405 } 406 407 public byte[] writeIPTCBlock(List<IptcRecord> elements) 408 throws ImageWriteException, IOException { 409 byte[] blockData; 410 final ByteArrayOutputStream baos = new ByteArrayOutputStream(); 411 try (BinaryOutputStream bos = new BinaryOutputStream(baos, getByteOrder())) { 412 413 // first, right record version record 414 bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER); 415 bos.write(IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER); 416 bos.write(IptcTypes.RECORD_VERSION.type); // record version record 417 // type. 418 bos.write2Bytes(2); // record version record size 419 bos.write2Bytes(2); // record version value 420 421 // make a copy of the list. 422 elements = new ArrayList<>(elements); 423 424 // sort the list. Records must be in numerical order. 425 final Comparator<IptcRecord> comparator = (e1, e2) -> e2.iptcType.getType() - e1.iptcType.getType(); 426 Collections.sort(elements, comparator); 427 // TODO: make sure order right 428 429 // write the list. 430 for (final IptcRecord element : elements) { 431 if (element.iptcType == IptcTypes.RECORD_VERSION) { 432 continue; // ignore 433 } 434 435 bos.write(IptcConstants.IPTC_RECORD_TAG_MARKER); 436 bos.write(IptcConstants.IPTC_APPLICATION_2_RECORD_NUMBER); 437 if (element.iptcType.getType() < 0 438 || element.iptcType.getType() > 0xff) { 439 throw new ImageWriteException("Invalid record type: " 440 + element.iptcType.getType()); 441 } 442 bos.write(element.iptcType.getType()); 443 444 final byte[] recordData = element.getValue().getBytes(StandardCharsets.ISO_8859_1); 445 if (!new String(recordData, StandardCharsets.ISO_8859_1).equals(element.getValue())) { 446 throw new ImageWriteException( 447 "Invalid record value, not ISO-8859-1"); 448 } 449 450 bos.write2Bytes(recordData.length); 451 bos.write(recordData); 452 } 453 } 454 455 blockData = baos.toByteArray(); 456 457 return blockData; 458 } 459 460}