001/* 002 * Copyright 2023 the original author or authors. 003 * <p> 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * <p> 008 * https://www.apache.org/licenses/LICENSE-2.0 009 * <p> 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package de.cuioss.tools.io; 017 018import static de.cuioss.tools.string.MoreStrings.nullToEmpty; 019 020import java.io.BufferedInputStream; 021import java.io.BufferedReader; 022import java.io.ByteArrayInputStream; 023import java.io.ByteArrayOutputStream; 024import java.io.EOFException; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.InputStreamReader; 028import java.io.OutputStream; 029import java.io.OutputStreamWriter; 030import java.io.Reader; 031import java.io.StringWriter; 032import java.io.Writer; 033import java.nio.charset.Charset; 034import java.nio.charset.StandardCharsets; 035 036import de.cuioss.tools.base.Preconditions; 037import lombok.experimental.UtilityClass; 038 039/** 040 * Provides a number of utilities in context {@link InputStream} and 041 * {@link OutputStream}s. The content is inspired / copied from 042 * org.apache.commons.io.IOUtils 043 * 044 * @author Oliver Wolff 045 * 046 */ 047@UtilityClass 048public class IOStreams { 049 050 private static final String ACTUAL = " actual: "; 051 052 private static final String SKIP_COUNT_MUST_BE_NON_NEGATIVE_ACTUAL = "Skip count must be non-negative, actual: "; 053 054 /** 055 * Represents the end-of-file (or stream). 056 */ 057 public static final int EOF = -1; 058 059 /** 060 * The default buffer size ({@value}) to use in copy methods. 061 */ 062 public static final int DEFAULT_BUFFER_SIZE = 1024 * 4; 063 064 /** 065 * The default buffer size to use for the skip() methods. 066 */ 067 private static final int SKIP_BUFFER_SIZE = 2048; 068 069 // Allocated in the relevant skip method if necessary. 070 /* 071 * These buffers are static and are shared between threads. This is possible 072 * because the buffers are write-only - the contents are never read. 073 * 074 * N.B. there is no need to synchronize when creating these because: - we don't 075 * care if the buffer is created multiple times (the data is ignored) - we 076 * always use the same size buffer, so if it is recreated it will still be OK 077 * (if the buffer size were variable, we would need to sync. to ensure some 078 * other thread did not create a smaller one) 079 */ 080 private static char[] skipCharBuffer; 081 private static byte[] skipByteBuffer; 082 083 /** 084 * Compares the contents of two Streams to determine if they are equal or not. 085 * <p> 086 * This method buffers the input internally using 087 * <code>BufferedInputStream</code> if they are not already buffered. 088 * 089 * @param input1 the first stream 090 * @param input2 the second stream 091 * @return true if the content of the streams are equal or they both don't 092 * exist, false otherwise 093 * @throws NullPointerException if either input is null 094 * @throws IOException if an I/O error occurs 095 */ 096 public static boolean contentEquals(InputStream input1, InputStream input2) throws IOException { 097 if (input1 == input2) { 098 return true; 099 } 100 if (!(input1 instanceof BufferedInputStream)) { 101 input1 = new BufferedInputStream(input1); 102 } 103 if (!(input2 instanceof BufferedInputStream)) { 104 input2 = new BufferedInputStream(input2); 105 } 106 107 var ch = input1.read(); 108 while (EOF != ch) { 109 final var ch2 = input2.read(); 110 if (ch != ch2) { 111 return false; 112 } 113 ch = input1.read(); 114 } 115 116 final var ch2 = input2.read(); 117 return ch2 == EOF; 118 } 119 120 /** 121 * Compares the contents of two Readers to determine if they are equal or not. 122 * <p> 123 * This method buffers the input internally using <code>BufferedReader</code> if 124 * they are not already buffered. 125 * 126 * @param input1 the first reader 127 * @param input2 the second reader 128 * @return true if the content of the readers are equal or they both don't 129 * exist, false otherwise 130 * @throws NullPointerException if either input is null 131 * @throws IOException if an I/O error occurs 132 */ 133 public static boolean contentEquals(Reader input1, Reader input2) throws IOException { 134 if (input1 == input2) { 135 return true; 136 } 137 138 input1 = toBufferedReader(input1); 139 input2 = toBufferedReader(input2); 140 141 var ch = input1.read(); 142 while (EOF != ch) { 143 final var ch2 = input2.read(); 144 if (ch != ch2) { 145 return false; 146 } 147 ch = input1.read(); 148 } 149 150 final var ch2 = input2.read(); 151 return ch2 == EOF; 152 } 153 154 /** 155 * Wraps an {@link ByteArrayInputStream} around a given {@link String} assuming 156 * {@link StandardCharsets#UTF_8} 157 * 158 * @param input to be wrapped, may be null or empty 159 * @return the created {@link InputStream} 160 */ 161 public static InputStream toInputStream(String input) { 162 return new ByteArrayInputStream(nullToEmpty(input).getBytes(StandardCharsets.UTF_8)); 163 } 164 165 /** 166 * Gets the contents of an <code>InputStream</code> as a String using the 167 * specified character encoding. 168 * <p> 169 * This method buffers the input internally, so there is no need to use a 170 * <code>BufferedInputStream</code>. 171 * </p> 172 * 173 * @param input the <code>InputStream</code> to read from, using UTF-8 encoding. 174 * @return the requested String 175 * @throws NullPointerException if the input is null 176 * @throws IOException if an I/O error occurs 177 */ 178 public static String toString(final InputStream input) throws IOException { 179 return toString(input, StandardCharsets.UTF_8); 180 } 181 182 /** 183 * Gets the contents of an <code>InputStream</code> as a String using the 184 * specified character encoding. 185 * <p> 186 * This method buffers the input internally, so there is no need to use a 187 * <code>BufferedInputStream</code>. 188 * </p> 189 * 190 * @param input the <code>InputStream</code> to read from 191 * @param encoding the encoding to use, null means platform default 192 * @return the requested String 193 * @throws IllegalArgumentException if the input is null 194 * @throws IOException if an I/O error occurs 195 */ 196 public static String toString(final InputStream input, final Charset encoding) throws IOException { 197 Preconditions.checkArgument(null != input, "InputStream must not be null"); 198 try (final var sw = new StringWriter()) { 199 copy(input, sw, encoding); 200 return sw.toString(); 201 } 202 } 203 204 /** 205 * Returns the given reader if it is a {@link BufferedReader}, otherwise creates 206 * a BufferedReader from the given reader. 207 * 208 * @param reader the reader to wrap or return (not null) 209 * @return the given reader or a new {@link BufferedReader} for the given reader 210 * @throws NullPointerException if the input parameter is null 211 */ 212 public static BufferedReader toBufferedReader(final Reader reader) { 213 return reader instanceof BufferedReader br ? br : new BufferedReader(reader); 214 } 215 216 // copy from InputStream 217 // ----------------------------------------------------------------------- 218 219 /** 220 * Copies bytes from an <code>InputStream</code> to an 221 * <code>OutputStream</code>. 222 * <p> 223 * This method buffers the input internally, so there is no need to use a 224 * <code>BufferedInputStream</code>. 225 * <p> 226 * Large streams (over 2GB) will return a bytes copied value of <code>-1</code> 227 * after the copy has completed since the correct number of bytes cannot be 228 * returned as an int. For large streams use the 229 * <code>copyLarge(InputStream, OutputStream)</code> method. 230 * 231 * @param input the <code>InputStream</code> to read from 232 * @param output the <code>OutputStream</code> to write to 233 * @return the number of bytes copied, or -1 if > Integer.MAX_VALUE 234 * @throws NullPointerException if the input or output is null 235 * @throws IOException if an I/O error occurs 236 * 237 */ 238 public static int copy(final InputStream input, final OutputStream output) throws IOException { 239 final var count = copyLarge(input, output); 240 if (count > Integer.MAX_VALUE) { 241 return -1; 242 } 243 return (int) count; 244 } 245 246 /** 247 * Copies bytes from an <code>InputStream</code> to an <code>OutputStream</code> 248 * using an internal buffer of the given size. 249 * <p> 250 * This method buffers the input internally, so there is no need to use a 251 * <code>BufferedInputStream</code>. 252 * 253 * @param input the <code>InputStream</code> to read from 254 * @param output the <code>OutputStream</code> to write to 255 * @param bufferSize the bufferSize used to copy from the input to the output 256 * @return the number of bytes copied 257 * @throws NullPointerException if the input or output is null 258 * @throws IOException if an I/O error occurs 259 * 260 */ 261 public static long copy(final InputStream input, final OutputStream output, final int bufferSize) 262 throws IOException { 263 return copyLarge(input, output, new byte[bufferSize]); 264 } 265 266 /** 267 * Copies bytes from a large (over 2GB) <code>InputStream</code> to an 268 * <code>OutputStream</code>. 269 * <p> 270 * This method buffers the input internally, so there is no need to use a 271 * <code>BufferedInputStream</code>. 272 * <p> 273 * The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}. 274 * 275 * @param input the <code>InputStream</code> to read from 276 * @param output the <code>OutputStream</code> to write to 277 * @return the number of bytes copied 278 * @throws NullPointerException if the input or output is null 279 * @throws IOException if an I/O error occurs 280 * 281 */ 282 public static long copyLarge(final InputStream input, final OutputStream output) throws IOException { 283 return copy(input, output, DEFAULT_BUFFER_SIZE); 284 } 285 286 /** 287 * Copies bytes from a large (over 2GB) <code>InputStream</code> to an 288 * <code>OutputStream</code>. 289 * <p> 290 * This method uses the provided buffer, so there is no need to use a 291 * <code>BufferedInputStream</code>. 292 * 293 * @param input the <code>InputStream</code> to read from 294 * @param output the <code>OutputStream</code> to write to 295 * @param buffer the buffer to use for the copy 296 * @return the number of bytes copied 297 * @throws NullPointerException if the input or output is null 298 * @throws IOException if an I/O error occurs 299 * 300 */ 301 public static long copyLarge(final InputStream input, final OutputStream output, final byte[] buffer) 302 throws IOException { 303 var count = 0L; 304 int n; 305 while (EOF != (n = input.read(buffer))) { 306 output.write(buffer, 0, n); 307 count += n; 308 } 309 return count; 310 } 311 312 /** 313 * Copies some or all bytes from a large (over 2GB) <code>InputStream</code> to 314 * an <code>OutputStream</code>, optionally skipping input bytes. 315 * <p> 316 * This method buffers the input internally, so there is no need to use a 317 * <code>BufferedInputStream</code>. 318 * </p> 319 * <p> 320 * Note that the implementation uses {@link #skip(InputStream, long)}. This 321 * means that the method may be considerably less efficient than using the 322 * actual skip implementation, this is done to guarantee that the correct number 323 * of characters are skipped. 324 * </p> 325 * The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}. 326 * 327 * @param input the <code>InputStream</code> to read from 328 * @param output the <code>OutputStream</code> to write to 329 * @param inputOffset number of bytes to skip from input before copying -ve 330 * values are ignored 331 * @param length number of bytes to copy. -ve means all 332 * @return the number of bytes copied 333 * @throws NullPointerException if the input or output is null 334 * @throws IOException if an I/O error occurs 335 * 336 */ 337 public static long copyLarge(final InputStream input, final OutputStream output, final long inputOffset, 338 final long length) throws IOException { 339 return copyLarge(input, output, inputOffset, length, new byte[DEFAULT_BUFFER_SIZE]); 340 } 341 342 /** 343 * Copies some or all bytes from a large (over 2GB) <code>InputStream</code> to 344 * an <code>OutputStream</code>, optionally skipping input bytes. 345 * <p> 346 * This method uses the provided buffer, so there is no need to use a 347 * <code>BufferedInputStream</code>. 348 * </p> 349 * <p> 350 * Note that the implementation uses {@link #skip(InputStream, long)}. This 351 * means that the method may be considerably less efficient than using the 352 * actual skip implementation, this is done to guarantee that the correct number 353 * of characters are skipped. 354 * </p> 355 * 356 * @param input the <code>InputStream</code> to read from 357 * @param output the <code>OutputStream</code> to write to 358 * @param inputOffset number of bytes to skip from input before copying -ve 359 * values are ignored 360 * @param length number of bytes to copy. -ve means all 361 * @param buffer the buffer to use for the copy 362 * @return the number of bytes copied 363 * @throws NullPointerException if the input or output is null 364 * @throws IOException if an I/O error occurs 365 * 366 */ 367 public static long copyLarge(final InputStream input, final OutputStream output, final long inputOffset, 368 final long length, final byte[] buffer) throws IOException { 369 if (inputOffset > 0) { 370 skipFully(input, inputOffset); 371 } 372 if (length == 0) { 373 return 0; 374 } 375 final var bufferLength = buffer.length; 376 var bytesToRead = bufferLength; 377 if (length > 0 && length < bufferLength) { 378 bytesToRead = (int) length; 379 } 380 int read; 381 var totalRead = 0L; 382 while (bytesToRead > 0 && EOF != (read = input.read(buffer, 0, bytesToRead))) { 383 output.write(buffer, 0, read); 384 totalRead += read; 385 if (length > 0) { // only adjust length if not reading to the end 386 // Note the cast must work because buffer.length is an integer 387 bytesToRead = (int) Math.min(length - totalRead, bufferLength); 388 } 389 } 390 return totalRead; 391 } 392 393 /** 394 * Copies bytes from an <code>InputStream</code> to chars on a 395 * <code>Writer</code> using the specified character encoding. 396 * <p> 397 * This method buffers the input internally, so there is no need to use a 398 * <code>BufferedInputStream</code>. 399 * <p> 400 * This method uses {@link InputStreamReader}. 401 * 402 * @param input the <code>InputStream</code> to read from 403 * @param output the <code>Writer</code> to write to 404 * @param inputEncoding the encoding to use for the input stream, null means 405 * platform default 406 * @throws NullPointerException if the input or output is null 407 * @throws IOException if an I/O error occurs 408 * 409 */ 410 public static void copy(final InputStream input, final Writer output, final Charset inputEncoding) 411 throws IOException { 412 final var in = new InputStreamReader(input, toCharset(inputEncoding)); 413 copy(in, output); 414 } 415 416 /** 417 * Returns the given Charset or the default Charset if the given Charset is 418 * null. 419 * 420 * @param charset A charset or null. 421 * @return the given Charset or the default Charset if the given Charset is null 422 */ 423 public static Charset toCharset(final Charset charset) { 424 return charset == null ? Charset.defaultCharset() : charset; 425 } 426 427 /** 428 * Copies bytes from an <code>InputStream</code> to chars on a 429 * <code>Writer</code> using the specified character encoding. 430 * <p> 431 * This method buffers the input internally, so there is no need to use a 432 * <code>BufferedInputStream</code>. 433 * <p> 434 * Character encoding names can be found at 435 * <a href="http://www.iana.org/assignments/character-sets">IANA</a>. 436 * <p> 437 * This method uses {@link InputStreamReader}. 438 * 439 * @param input the <code>InputStream</code> to read from 440 * @param output the <code>Writer</code> to write to 441 * @param inputEncoding the encoding to use for the InputStream, null means 442 * platform default 443 * @throws NullPointerException if the input or output 444 * is null 445 * @throws IOException if an I/O error occurs 446 * @throws java.nio.charset.UnsupportedCharsetException thrown instead of 447 * {@link java.io 448 * .UnsupportedEncodingException} 449 * in version 2.2 if the 450 * encoding is not 451 * supported. 452 * 453 */ 454 public static void copy(final InputStream input, final Writer output, final String inputEncoding) 455 throws IOException { 456 copy(input, output, toCharset(inputEncoding)); 457 } 458 459 /** 460 * Returns a Charset for the named charset. If the name is null, return the 461 * default Charset. 462 * 463 * @param charset The name of the requested charset, may be null. 464 * @return a Charset for the named charset 465 * @throws java.nio.charset.UnsupportedCharsetException If the named charset is 466 * unavailable 467 */ 468 public static Charset toCharset(final String charset) { 469 return charset == null ? Charset.defaultCharset() : Charset.forName(charset); 470 } 471 472 // copy from Reader 473 // ----------------------------------------------------------------------- 474 475 /** 476 * Copies chars from a <code>Reader</code> to a <code>Writer</code>. 477 * <p> 478 * This method buffers the input internally, so there is no need to use a 479 * <code>BufferedReader</code>. 480 * <p> 481 * Large streams (over 2GB) will return a chars copied value of <code>-1</code> 482 * after the copy has completed since the correct number of chars cannot be 483 * returned as an int. For large streams use the 484 * <code>copyLarge(Reader, Writer)</code> method. 485 * 486 * @param input the <code>Reader</code> to read from 487 * @param output the <code>Writer</code> to write to 488 * @return the number of characters copied, or -1 if > Integer.MAX_VALUE 489 * @throws NullPointerException if the input or output is null 490 * @throws IOException if an I/O error occurs 491 * 492 */ 493 public static int copy(final Reader input, final Writer output) throws IOException { 494 final var count = copyLarge(input, output); 495 if (count > Integer.MAX_VALUE) { 496 return -1; 497 } 498 return (int) count; 499 } 500 501 /** 502 * Copies chars from a large (over 2GB) <code>Reader</code> to a 503 * <code>Writer</code>. 504 * <p> 505 * This method buffers the input internally, so there is no need to use a 506 * <code>BufferedReader</code>. 507 * <p> 508 * The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}. 509 * 510 * @param input the <code>Reader</code> to read from 511 * @param output the <code>Writer</code> to write to 512 * @return the number of characters copied 513 * @throws NullPointerException if the input or output is null 514 * @throws IOException if an I/O error occurs 515 * 516 */ 517 public static long copyLarge(final Reader input, final Writer output) throws IOException { 518 return copyLarge(input, output, new char[DEFAULT_BUFFER_SIZE]); 519 } 520 521 /** 522 * Copies chars from a large (over 2GB) <code>Reader</code> to a 523 * <code>Writer</code>. 524 * <p> 525 * This method uses the provided buffer, so there is no need to use a 526 * <code>BufferedReader</code>. 527 * 528 * @param input the <code>Reader</code> to read from 529 * @param output the <code>Writer</code> to write to 530 * @param buffer the buffer to be used for the copy 531 * @return the number of characters copied 532 * @throws NullPointerException if the input or output is null 533 * @throws IOException if an I/O error occurs 534 * 535 */ 536 public static long copyLarge(final Reader input, final Writer output, final char[] buffer) throws IOException { 537 var count = 0L; 538 int n; 539 while (EOF != (n = input.read(buffer))) { 540 output.write(buffer, 0, n); 541 count += n; 542 } 543 return count; 544 } 545 546 /** 547 * Copies some or all chars from a large (over 2GB) <code>InputStream</code> to 548 * an <code>OutputStream</code>, optionally skipping input chars. 549 * <p> 550 * This method buffers the input internally, so there is no need to use a 551 * <code>BufferedReader</code>. 552 * <p> 553 * The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}. 554 * 555 * @param input the <code>Reader</code> to read from 556 * @param output the <code>Writer</code> to write to 557 * @param inputOffset number of chars to skip from input before copying -ve 558 * values are ignored 559 * @param length number of chars to copy. -ve means all 560 * @return the number of chars copied 561 * @throws NullPointerException if the input or output is null 562 * @throws IOException if an I/O error occurs 563 * 564 */ 565 public static long copyLarge(final Reader input, final Writer output, final long inputOffset, final long length) 566 throws IOException { 567 return copyLarge(input, output, inputOffset, length, new char[DEFAULT_BUFFER_SIZE]); 568 } 569 570 /** 571 * Copies some or all chars from a large (over 2GB) <code>InputStream</code> to 572 * an <code>OutputStream</code>, optionally skipping input chars. 573 * <p> 574 * This method uses the provided buffer, so there is no need to use a 575 * <code>BufferedReader</code>. 576 * 577 * @param input the <code>Reader</code> to read from 578 * @param output the <code>Writer</code> to write to 579 * @param inputOffset number of chars to skip from input before copying -ve 580 * values are ignored 581 * @param length number of chars to copy. -ve means all 582 * @param buffer the buffer to be used for the copy 583 * @return the number of chars copied 584 * @throws NullPointerException if the input or output is null 585 * @throws IOException if an I/O error occurs 586 * 587 */ 588 public static long copyLarge(final Reader input, final Writer output, final long inputOffset, final long length, 589 final char[] buffer) throws IOException { 590 if (inputOffset > 0) { 591 skipFully(input, inputOffset); 592 } 593 if (length == 0) { 594 return 0; 595 } 596 var bytesToRead = buffer.length; 597 if (length > 0 && length < buffer.length) { 598 bytesToRead = (int) length; 599 } 600 int read; 601 var totalRead = 0L; 602 while (bytesToRead > 0 && EOF != (read = input.read(buffer, 0, bytesToRead))) { 603 output.write(buffer, 0, read); 604 totalRead += read; 605 if (length > 0) { // only adjust length if not reading to the end 606 // Note the cast must work because buffer.length is an integer 607 bytesToRead = (int) Math.min(length - totalRead, buffer.length); 608 } 609 } 610 return totalRead; 611 } 612 613 /** 614 * Copies chars from a <code>Reader</code> to bytes on an 615 * <code>OutputStream</code> using the specified character encoding, and calling 616 * flush. 617 * <p> 618 * This method buffers the input internally, so there is no need to use a 619 * <code>BufferedReader</code>. 620 * </p> 621 * <p> 622 * Due to the implementation of OutputStreamWriter, this method performs a 623 * flush. 624 * </p> 625 * <p> 626 * This method uses {@link OutputStreamWriter}. 627 * </p> 628 * 629 * @param input the <code>Reader</code> to read from 630 * @param output the <code>OutputStream</code> to write to 631 * @param outputEncoding the encoding to use for the OutputStream, null means 632 * platform default 633 * @throws NullPointerException if the input or output is null 634 * @throws IOException if an I/O error occurs 635 * 636 */ 637 public static void copy(final Reader input, final OutputStream output, final Charset outputEncoding) 638 throws IOException { 639 final var out = new OutputStreamWriter(output, toCharset(outputEncoding)); 640 copy(input, out); 641 // Unless anyone is planning on rewriting OutputStreamWriter, 642 // we have to flush here. 643 out.flush(); 644 } 645 646 /** 647 * Copies chars from a <code>Reader</code> to bytes on an 648 * <code>OutputStream</code> using the specified character encoding, and calling 649 * flush. 650 * <p> 651 * This method buffers the input internally, so there is no need to use a 652 * <code>BufferedReader</code>. 653 * <p> 654 * Character encoding names can be found at 655 * <a href="http://www.iana.org/assignments/character-sets">IANA</a>. 656 * <p> 657 * Due to the implementation of OutputStreamWriter, this method performs a 658 * flush. 659 * <p> 660 * This method uses {@link OutputStreamWriter}. 661 * 662 * @param input the <code>Reader</code> to read from 663 * @param output the <code>OutputStream</code> to write to 664 * @param outputEncoding the encoding to use for the OutputStream, null means 665 * platform default 666 * @throws NullPointerException if the input or output 667 * is null 668 * @throws IOException if an I/O error occurs 669 * @throws java.nio.charset.UnsupportedCharsetException thrown instead of 670 * {@link java.io 671 * .UnsupportedEncodingException} 672 * in version 2.2 if the 673 * encoding is not 674 * supported. 675 * 676 */ 677 public static void copy(final Reader input, final OutputStream output, final String outputEncoding) 678 throws IOException { 679 copy(input, output, toCharset(outputEncoding)); 680 } 681 682 /** 683 * Skips bytes from an input byte stream. This implementation guarantees that it 684 * will read as many bytes as possible before giving up; this may not always be 685 * the case for skip() implementations in subclasses of {@link InputStream}. 686 * <p> 687 * Note that the implementation uses {@link InputStream#read(byte[], int, int)} 688 * rather than delegating to {@link InputStream#skip(long)}. This means that the 689 * method may be considerably less efficient than using the actual skip 690 * implementation, this is done to guarantee that the correct number of bytes 691 * are skipped. 692 * </p> 693 * 694 * @param input byte stream to skip 695 * @param toSkip number of bytes to skip. 696 * @return number of bytes actually skipped. 697 * @throws IOException if there is a problem reading the file 698 * @throws IllegalArgumentException if toSkip is negative 699 * @see InputStream#skip(long) 700 * @see <a href="https://issues.apache.org/jira/browse/IO-203">IO-203 - Add 701 * skipFully() method for InputStreams</a> 702 * 703 */ 704 public static long skip(final InputStream input, final long toSkip) throws IOException { 705 if (toSkip < 0) { 706 throw new IllegalArgumentException(SKIP_COUNT_MUST_BE_NON_NEGATIVE_ACTUAL + toSkip); 707 } 708 /* 709 * N.B. no need to synchronize this because: - we don't care if the buffer is 710 * created multiple times (the data is ignored) - we always use the same size 711 * buffer, so if it is recreated it will still be OK (if the buffer size were 712 * variable, we would need to synch. to ensure some other thread did not create 713 * a smaller one) 714 */ 715 if (skipByteBuffer == null) { 716 skipByteBuffer = new byte[SKIP_BUFFER_SIZE]; 717 } 718 var remain = toSkip; 719 while (remain > 0) { 720 // See https://issues.apache.org/jira/browse/IO-203 for why we use read() rather 721 // than 722 // delegating to skip() 723 final long n = input.read(skipByteBuffer, 0, (int) Math.min(remain, SKIP_BUFFER_SIZE)); 724 if (n < 0) { // EOF 725 break; 726 } 727 remain -= n; 728 } 729 return toSkip - remain; 730 } 731 732 /** 733 * Skips characters from an input character stream. This implementation 734 * guarantees that it will read as many characters as possible before giving up; 735 * this may not always be the case for skip() implementations in subclasses of 736 * {@link Reader}. 737 * <p> 738 * Note that the implementation uses {@link Reader#read(char[], int, int)} 739 * rather than delegating to {@link Reader#skip(long)}. This means that the 740 * method may be considerably less efficient than using the actual skip 741 * implementation, this is done to guarantee that the correct number of 742 * characters are skipped. 743 * </p> 744 * 745 * @param input character stream to skip 746 * @param toSkip number of characters to skip. 747 * @return number of characters actually skipped. 748 * @throws IOException if there is a problem reading the file 749 * @throws IllegalArgumentException if toSkip is negative 750 * @see Reader#skip(long) 751 * @see <a href="https://issues.apache.org/jira/browse/IO-203">IO-203 - Add 752 * skipFully() method for InputStreams</a> 753 * 754 */ 755 public static long skip(final Reader input, final long toSkip) throws IOException { 756 if (toSkip < 0) { 757 throw new IllegalArgumentException(SKIP_COUNT_MUST_BE_NON_NEGATIVE_ACTUAL + toSkip); 758 } 759 /* 760 * N.B. no need to synchronize this because: - we don't care if the buffer is 761 * created multiple times (the data is ignored) - we always use the same size 762 * buffer, so if it is recreated it will still be OK (if the buffer size were 763 * variable, we would need to sync. to ensure some other thread did not create a 764 * smaller one) 765 */ 766 if (skipCharBuffer == null) { 767 skipCharBuffer = new char[SKIP_BUFFER_SIZE]; 768 } 769 var remain = toSkip; 770 while (remain > 0) { 771 // See https://issues.apache.org/jira/browse/IO-203 for why we use read() rather 772 // than 773 // delegating to skip() 774 final long n = input.read(skipCharBuffer, 0, (int) Math.min(remain, SKIP_BUFFER_SIZE)); 775 if (n < 0) { // EOF 776 break; 777 } 778 remain -= n; 779 } 780 return toSkip - remain; 781 } 782 783 /** 784 * Skips the requested number of bytes or fail if there are not enough left. 785 * <p> 786 * This allows for the possibility that {@link InputStream#skip(long)} may not 787 * skip as many bytes as requested (most likely because of reaching EOF). 788 * <p> 789 * Note that the implementation uses {@link #skip(InputStream, long)}. This 790 * means that the method may be considerably less efficient than using the 791 * actual skip implementation, this is done to guarantee that the correct number 792 * of characters are skipped. 793 * </p> 794 * 795 * @param input stream to skip 796 * @param toSkip the number of bytes to skip 797 * @throws IOException if there is a problem reading the file 798 * @throws IllegalArgumentException if toSkip is negative 799 * @throws EOFException if the number of bytes skipped was incorrect 800 * @see InputStream#skip(long) 801 * 802 */ 803 public static void skipFully(final InputStream input, final long toSkip) throws IOException { 804 if (toSkip < 0) { 805 throw new IllegalArgumentException("Bytes to skip must not be negative: " + toSkip); 806 } 807 final var skipped = skip(input, toSkip); 808 if (skipped != toSkip) { 809 throw new EOFException("Bytes to skip: " + toSkip + ACTUAL + skipped); 810 } 811 } 812 813 /** 814 * Skips the requested number of characters or fail if there are not enough 815 * left. 816 * <p> 817 * This allows for the possibility that {@link Reader#skip(long)} may not skip 818 * as many characters as requested (most likely because of reaching EOF). 819 * <p> 820 * Note that the implementation uses {@link #skip(Reader, long)}. This means 821 * that the method may be considerably less efficient than using the actual skip 822 * implementation, this is done to guarantee that the correct number of 823 * characters are skipped. 824 * </p> 825 * 826 * @param input stream to skip 827 * @param toSkip the number of characters to skip 828 * @throws IOException if there is a problem reading the file 829 * @throws IllegalArgumentException if toSkip is negative 830 * @throws EOFException if the number of characters skipped was 831 * incorrect 832 * @see Reader#skip(long) 833 * 834 */ 835 public static void skipFully(final Reader input, final long toSkip) throws IOException { 836 final var skipped = skip(input, toSkip); 837 if (skipped != toSkip) { 838 throw new EOFException("Chars to skip: " + toSkip + ACTUAL + skipped); 839 } 840 } 841 842 // InputStream to byte array 843 844 /** 845 * Gets the contents of an <code>InputStream</code> as a <code>byte[]</code>. 846 * <p> 847 * This method buffers the input internally, so there is no need to use a 848 * <code>BufferedInputStream</code>. 849 * 850 * @param input the <code>InputStream</code> to read from 851 * @return the requested byte array 852 * @throws NullPointerException if the input is null 853 * @throws IOException if an I/O error occurs 854 */ 855 public static byte[] toByteArray(final InputStream input) throws IOException { 856 try (final var output = new ByteArrayOutputStream()) { 857 copy(input, output); 858 return output.toByteArray(); 859 } 860 } 861 862}