001/*- 002 ******************************************************************************* 003 * Copyright (c) 2011, 2016 Diamond Light Source Ltd. 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the Eclipse Public License v1.0 006 * which accompanies this distribution, and is available at 007 * http://www.eclipse.org/legal/epl-v10.html 008 * 009 * Contributors: 010 * Peter Chang - initial API and implementation and/or initial documentation 011 *******************************************************************************/ 012 013package org.eclipse.january.dataset; 014 015import java.io.IOException; 016import java.io.Serializable; 017import java.lang.annotation.Annotation; 018import java.lang.reflect.Field; 019import java.util.ArrayList; 020import java.util.Arrays; 021import java.util.HashMap; 022import java.util.LinkedList; 023import java.util.List; 024import java.util.Map; 025 026import org.eclipse.january.DatasetException; 027import org.eclipse.january.IMonitor; 028import org.eclipse.january.io.ILazyLoader; 029import org.eclipse.january.metadata.MetadataFactory; 030import org.eclipse.january.metadata.MetadataType; 031import org.eclipse.january.metadata.OriginMetadata; 032import org.eclipse.january.metadata.Reshapeable; 033import org.eclipse.january.metadata.Sliceable; 034import org.eclipse.january.metadata.Transposable; 035 036public class LazyDataset extends LazyDatasetBase implements Serializable, Cloneable { 037 private static final long serialVersionUID = 2467865859867440242L; 038 039 protected Map<Class<? extends MetadataType>, List<MetadataType>> oMetadata = null; 040 protected int[] oShape; // original shape 041 protected long size; // number of items 042 protected int dtype; // dataset type 043 protected int isize; // number of elements per item 044 045 protected ILazyLoader loader; 046 047 // relative to loader 048 protected int[] begSlice = null; // slice begin 049 protected int[] delSlice = null; // slice delta 050 /** 051 * @since 2.2 052 */ 053 protected int[] sShape = null; // sliced shape 054 055 /** 056 * @since 2.2 057 */ 058 protected int[] padding = null; // differences in shape from original (or sliced) shape 059 protected int[] map; // transposition map (same length as current shape) 060 061 /** 062 * Create a lazy dataset 063 * @param name 064 * @param dtype dataset type 065 * @param elements 066 * @param shape 067 * @param loader 068 */ 069 public LazyDataset(String name, int dtype, int elements, int[] shape, ILazyLoader loader) { 070 this.name = name; 071 this.shape = shape.clone(); 072 this.oShape = this.shape; 073 this.loader = loader; 074 this.dtype = dtype; 075 this.isize = elements; 076 try { 077 size = ShapeUtils.calcLongSize(shape); 078 } catch (IllegalArgumentException e) { 079 size = Long.MAX_VALUE; // this indicates that the entire dataset cannot be read in! 080 } 081 } 082 083 /** 084 * Create a lazy dataset 085 * @param name 086 * @param dtype dataset type 087 * @param shape 088 * @param loader 089 */ 090 public LazyDataset(String name, int dtype, int[] shape, ILazyLoader loader) { 091 this(name, dtype, 1, shape, loader); 092 } 093 094 LazyDataset(LazyDataset other) { 095 name = other.name; 096 shape = other.shape.clone(); 097 metadata = other.copyMetadata(); 098 oMetadata = other.oMetadata; 099 oShape = other.oShape; 100 size = other.size; 101 dtype = other.dtype; 102 isize = other.isize; 103 loader = other.loader; 104 105 begSlice = other.begSlice; 106 delSlice = other.delSlice; 107 sShape = other.sShape; 108 padding = other.padding; 109 map = other.map; 110 } 111 112 /** 113 * Create a lazy dataset based on in-memory data (handy for testing) 114 * @param dataset 115 */ 116 public static LazyDataset createLazyDataset(final Dataset dataset) { 117 return new LazyDataset(dataset.getName(), dataset.getDType(), dataset.getElementsPerItem(), dataset.getShape(), 118 new ILazyLoader() { 119 private static final long serialVersionUID = -6725268922780517523L; 120 121 final Dataset d = dataset; 122 @Override 123 public boolean isFileReadable() { 124 return true; 125 } 126 127 @Override 128 public Dataset getDataset(IMonitor mon, SliceND slice) throws IOException { 129 return d.getSlice(mon, slice); 130 } 131 }); 132 } 133 134 /** 135 * Can return -1 for unknown 136 */ 137 @Override 138 public int getDType() { 139 return dtype; 140 } 141 142 /** 143 * Can return -1 for unknown 144 */ 145 @Override 146 public int getElementsPerItem() { 147 return isize; 148 } 149 150 @Override 151 public int getSize() { 152 return (int) size; 153 } 154 155 @Override 156 public String toString() { 157 StringBuilder out = new StringBuilder(); 158 159 if (name != null && name.length() > 0) { 160 out.append("Lazy dataset '"); 161 out.append(name); 162 out.append("' has shape ["); 163 } else { 164 out.append("Lazy dataset shape is ["); 165 } 166 int rank = shape == null ? 0 : shape.length; 167 168 if (rank > 0 && shape[0] >= 0) { 169 out.append(shape[0]); 170 } 171 for (int i = 1; i < rank; i++) { 172 out.append(", " + shape[i]); 173 } 174 out.append(']'); 175 176 return out.toString(); 177 } 178 179 @Override 180 public int hashCode() { 181 final int prime = 31; 182 int result = super.hashCode(); 183 result = prime * result + Arrays.hashCode(oShape); 184 result = prime * result + (int) (size ^ (size >>> 32)); 185 result = prime * result + dtype; 186 result = prime * result + isize; 187 result = prime * result + ((loader == null) ? 0 : loader.hashCode()); 188 result = prime * result + Arrays.hashCode(begSlice); 189 result = prime * result + Arrays.hashCode(delSlice); 190 result = prime * result + Arrays.hashCode(sShape); 191 result = prime * result + Arrays.hashCode(padding); 192 result = prime * result + Arrays.hashCode(map); 193 return result; 194 } 195 196 @Override 197 public boolean equals(Object obj) { 198 if (!super.equals(obj)) { 199 return false; 200 } 201 202 LazyDataset other = (LazyDataset) obj; 203 if (!Arrays.equals(oShape, other.oShape)) { 204 return false; 205 } 206 if (size != other.size) { 207 return false; 208 } 209 if (dtype != other.dtype) { 210 return false; 211 } 212 if (isize != other.isize) { 213 return false; 214 } 215 216 if (loader != other.loader) { 217 return false; 218 } 219 220 if (!Arrays.equals(begSlice, other.begSlice)) { 221 return false; 222 } 223 if (!Arrays.equals(delSlice, other.delSlice)) { 224 return false; 225 } 226 if (!Arrays.equals(sShape, other.sShape)) { 227 return false; 228 } 229 if (!Arrays.equals(padding, other.padding)) { 230 return false; 231 } 232 if (!Arrays.equals(map, other.map)) { 233 return false; 234 } 235 236 return true; 237 } 238 239 @Override 240 public LazyDataset clone() { 241 return new LazyDataset(this); 242 } 243 244 @Override 245 public void setShape(int... shape) { 246 setShapeInternal(shape); 247 } 248 249 @Override 250 public LazyDataset squeezeEnds() { 251 setShapeInternal(ShapeUtils.squeezeShape(shape, true)); 252 return this; 253 } 254 255 @Override 256 public Dataset getSlice(int[] start, int[] stop, int[] step) throws DatasetException { 257 return getSlice(null, start, stop, step); 258 } 259 260 @Override 261 public Dataset getSlice(Slice... slice) throws DatasetException { 262 if (slice == null || slice.length == 0) { 263 return getSlice(null, new SliceND(shape)); 264 } 265 return getSlice(null, new SliceND(shape, slice)); 266 } 267 268 @Override 269 public Dataset getSlice(SliceND slice) throws DatasetException { 270 return getSlice(null, slice); 271 } 272 273 @Override 274 public Dataset getSlice(IMonitor monitor, Slice... slice) throws DatasetException { 275 if (slice == null || slice.length == 0) { 276 return getSlice(monitor, new SliceND(shape)); 277 } 278 return getSlice(monitor, new SliceND(shape, slice)); 279 } 280 281 @Override 282 public LazyDataset getSliceView(Slice... slice) { 283 if (slice == null || slice.length == 0) { 284 return getSliceView(new SliceND(shape)); 285 } 286 return getSliceView(new SliceND(shape, slice)); 287 } 288 289 /** 290 * @param nShape 291 */ 292 private void setShapeInternal(int... nShape) { 293 // work out transposed (sliced) shape (instead of removing padding from current shape) 294 if (size != 0) { 295 int[] pShape = calcTransposed(map, sShape == null ? oShape : sShape); 296 padding = ShapeUtils.calcShapePadding(pShape, nShape); 297 } 298 299 if (metadata != null) { 300 storeMetadata(metadata, Reshapeable.class); 301 metadata = copyMetadata(); 302 reshapeMetadata(shape, nShape); 303 } 304 shape = nShape; 305 } 306 307 @Override 308 public LazyDataset getSliceView(int[] start, int[] stop, int[] step) { 309 return getSliceView(new SliceND(shape, start, stop, step)); 310 } 311 312 @Override 313 public LazyDataset getSliceView(SliceND slice) { 314 LazyDataset view = clone(); 315 if (slice.isAll()) { 316 return view; 317 } 318 319 SliceND nslice = calcTrueSlice(slice); 320 if (nslice != null) { 321 view.begSlice = nslice.getStart(); 322 view.delSlice = nslice.getStep(); 323 view.sShape = nslice.getShape(); 324 } 325 view.shape = slice.getShape(); 326 view.size = ShapeUtils.calcLongSize(view.shape); 327 view.storeMetadata(metadata, Sliceable.class); 328 329 view.sliceMetadata(true, slice); 330 return view; 331 } 332 333 @Override 334 public Dataset getSlice(IMonitor monitor, int[] start, int[] stop, int[] step) throws DatasetException { 335 return getSlice(monitor, new SliceND(shape, start, stop, step)); 336 } 337 338 @Override 339 public Dataset getSlice(IMonitor monitor, SliceND slice) throws DatasetException { 340 if (loader != null && !loader.isFileReadable()) { 341 return null; 342 } 343 344 SliceND nslice = calcTrueSlice(slice); 345 346 Dataset a; 347 if (nslice == null) { 348 a = DatasetFactory.zeros(slice.getShape(), getDType()); 349 } else { 350 try { 351 a = DatasetUtils.convertToDataset(loader.getDataset(monitor, nslice)); 352 } catch (IOException e) { 353 logger.error("Problem getting {}: {}", String.format("slice %s %s %s from %s", Arrays.toString(slice.getStart()), Arrays.toString(slice.getStop()), 354 Arrays.toString(slice.getStep()), loader), e); 355 throw new DatasetException(e); 356 } 357 } 358 a.setName(name + AbstractDataset.BLOCK_OPEN + (nslice == null ? slice : nslice) + AbstractDataset.BLOCK_CLOSE); 359 if (metadata != null && a instanceof LazyDatasetBase) { 360 LazyDatasetBase ba = (LazyDatasetBase) a; 361 ba.metadata = copyMetadata(); 362 if (oMetadata != null) { 363 ba.restoreMetadata(oMetadata); 364 } 365 // metadata axis may be larger than data 366 if (nslice != null && (!nslice.isAll() || nslice.getMaxShape() != nslice.getShape())) { 367 ba.sliceMetadata(true, nslice); 368 } 369 } 370 371 if (nslice != null) { 372 if (map != null) { 373 a = a.getTransposedView(map); 374 } 375 if (padding != null) { 376 a.setShape(slice.getShape()); 377 } 378 } 379 a.addMetadata(MetadataFactory.createMetadata(OriginMetadata.class, this, nslice == null ? slice.convertToSlice() : nslice.convertToSlice(), oShape, null, name)); 380 381 return a; 382 } 383 384 @Override 385 public LazyDataset getTransposedView(final int... axes) { 386 LazyDataset view = clone(); 387 388 int[] naxes = checkPermutatedAxes(shape, axes); 389 if (naxes == null) { 390 return view; 391 } 392 393 view.shape = calcTransposed(naxes, shape); 394 if (view.size != 0 && padding != null) { // work out transpose by reverting effect of padding 395 int or = oShape.length; 396 int nr = shape.length; 397 int j = 0; // naxes index 398 int[] mShape = calcTransposed(map, sShape == null ? oShape : sShape); // pre-padded shape 399 int m = 0; // shape index 400 int e = -1; // index of unit dimension 401 final List<Integer> uaxes = new LinkedList<>(); 402 for (int a : naxes) { 403 uaxes.add(a); 404 } 405 List<Integer> oList = new ArrayList<>(); // dimensions left out by padding (in order) 406 int np = padding.length; 407 for (int i = 0; i < np; i++) { 408 int p = padding[i]; 409 if (p > 0) { // remove added dimensions 410 for (int k = 0; k < p; k++, j++) { 411 uaxes.remove((Integer) j); 412 } 413 } else if (p == 0) { // leave alone 414 if (mShape[m] == 1) { // bump up last unit dimension index 415 e = m; 416 } 417 j++; 418 m++; 419 } else { // add omitted dimensions to list 420 p = -p; 421 for (int k = 0; k < p; k++) { 422 e = find(mShape, 1, e + 1); 423 oList.add(e); 424 } 425 } 426 } 427 428 int[] omitted = new int[oList.size()]; 429 j = 0; 430 for (Integer o : oList) { 431 omitted[j++] = o; 432 } 433 int[] used = new int[or - omitted.length]; // all dimensions not omitted in pre-padded shape 434 j = 0; 435 for (int i = 0; i < or; i++) { 436 if (Arrays.binarySearch(omitted, i) < 0) { 437 used[j++] = i; 438 } 439 } 440 441 int[] vaxes = new int[uaxes.size()]; 442 j = 0; 443 for (int i = 0; i < nr; i++) { // remap dimension numbering 444 int l = uaxes.indexOf(i); 445 if (l >= 0) { 446 vaxes[l] = used[j++]; 447 } 448 } 449 int[] taxes = new int[or]; 450 j = 0; 451 for (int i = 0; i < or; i++) { // reassemble map 452 if (Arrays.binarySearch(omitted, i) >= 0) { 453 taxes[i] = i; 454 } else { 455 taxes[i] = vaxes[j++]; 456 } 457 } 458 459 naxes = taxes; 460 } 461 462 view.map = map == null ? naxes : calcTransposed(naxes, map); 463 if (view.size != 0) { 464 // work out transposed (sliced) shape 465 int[] tShape = calcTransposed(view.map, sShape == null ? oShape : sShape); 466 try { 467 view.padding = ShapeUtils.calcShapePadding(tShape, view.shape); 468 } catch (IllegalArgumentException e) { 469 System.err.println(e.getMessage() + ": " + Arrays.toString(tShape) + " cf " + Arrays.toString(view.shape)); 470 } 471 } 472 view.storeMetadata(metadata, Transposable.class); 473 view.transposeMetadata(axes); 474 return view; 475 } 476 477 private static int find(int[] map, int m, int off) { 478 for (int i = off, imax = map.length; i < imax; i++) { 479 if (map[i] == m) { 480 return i; 481 } 482 } 483 return -1; 484 } 485 486 private static int[] calcTransposed(int[] map, int[] values) { 487 if (values == null) { 488 return null; 489 } 490 int r = values.length; 491 if (map == null || r < 2) { 492 return values; 493 } 494 int[] ovalues = new int[r]; 495 for (int i = 0; i < r; i++) { 496 ovalues[i] = values[map[i]]; 497 } 498 return ovalues; 499 } 500 501 /** 502 * Calculate absolute slice 503 * @param slice 504 * @return true slice or null if zero-sized 505 */ 506 protected final SliceND calcTrueSlice(SliceND slice) { 507 /* 508 * Lazy dataset operations: getTransposedView (T), getSliceView (G), setShape/squeezeEnds (S+/S-): 509 * 510 * . T sets shape, base, and map in new view 511 * . G sets shape, size, begSlice and delSlice in new view 512 * . S sets shape, shapePadding in current view 513 * 514 * Then getSlice needs to interpret all info to find true slice, load data, get transposition (view) 515 * and set shape. Therefore: 516 * . S needs to update shapePadding only 517 * . T needs to update shapePadding too 518 * . G needs to work out true slice to update 519 * 520 * slice -> true slice 521 * adjusts for shape (S^-1) then remap dimensions (T^-1) 522 */ 523 524 if (slice == null) { 525 slice = new SliceND(shape); 526 } 527 528 if (ShapeUtils.calcLongSize(slice.getShape()) == 0) { 529 return null; 530 } 531 532 int[] nshape; 533 int[] nstart; 534 int[] nstep; 535 536 int r = oShape.length; 537 if (padding == null) { 538 nshape = slice.getShape(); 539 nstart = slice.getStart(); 540 nstep = slice.getStep(); 541 } else { 542 final int[] lshape = slice.getShape(); 543 final int[] lstart = slice.getStart(); 544 final int[] lstep = slice.getStep(); 545 546 nstart = new int[r]; 547 nstep = new int[r]; 548 nshape = new int[r]; 549 int i = 0; 550 int j = 0; 551 for (int p : padding) { // remove padding 552 if (p == 0) { 553 nshape[i] = lshape[j]; 554 nstart[i] = lstart[j]; 555 nstep[i] = lstep[j]; 556 i++; 557 j++; 558 } else if (p < 0) { 559 int imax = i - p; 560 while (i < imax) { 561 nshape[i] = 1; 562 nstep[i] = 1; 563 i++; 564 } 565 } else { 566 j += p; 567 } 568 } 569 } 570 571 if (map != null && r > 1) { // transpose dimensions 572 int[] pshape = new int[r]; 573 int[] pstart = new int[r]; 574 int[] pstep = new int[r]; 575 for (int i = 0; i < r; i++) { 576 int m = map[i]; 577 pshape[m] = nshape[i]; 578 pstart[m] = nstart[i]; 579 pstep[m] = nstep[i]; 580 } 581 582 nshape = pshape; 583 nstart = pstart; 584 nstep = pstep; 585 } 586 587 int[] nstop = new int[r]; 588 if (begSlice != null) { // find net slice 589 for (int i = 0; i < r; i++) { 590 int b = begSlice[i]; 591 int d = delSlice[i]; 592 nstart[i] = b + nstart[i] * d; 593 int nd = nstep[i] * d; 594 nstep[i] = nd; 595 nstop[i] = nstart[i] + (nshape[i] - 1) * nd + (nd >= 0 ? 1 : -1); 596 } 597 } else { 598 for (int i = 0; i < r; i++) { 599 int d = nstep[i]; 600 nstop[i] = nstart[i] + (nshape[i] - 1) * d + (d >= 0 ? 1 : -1); 601 } 602 } 603 604 return createSlice(nstart, nstop, nstep); 605 } 606 607 protected SliceND createSlice(int[] nstart, int[] nstop, int[] nstep) { 608 return SliceND.createSlice(oShape, null, nstart, nstop, nstep); 609 } 610 611 /** 612 * Transform data so that it can be used in setSlice of saver 613 * @param data 614 * @param tslice true slice 615 * @return data with dimensions adjusted and remapped 616 */ 617 final IDataset transformInput(IDataset data, SliceND tslice) { 618 if (padding != null) { // remove padding 619 data = data.getSliceView(); 620 int[] nshape = tslice.getShape(); 621 data.setShape(nshape); 622 } 623 624 return map == null ? data : data.getTransposedView(map); 625 } 626 627 /** 628 * Store metadata items that has given annotation 629 * @param origMetadata 630 * @param aclazz 631 */ 632 private void storeMetadata(Map<Class<? extends MetadataType>, List<MetadataType>> origMetadata, Class<? extends Annotation> aclazz) { 633 List<Class<? extends MetadataType>> mclazzes = findAnnotatedMetadata(aclazz); 634 if (mclazzes.size() == 0) { 635 return; 636 } 637 638 if (oMetadata == null) { 639 oMetadata = new HashMap<Class<? extends MetadataType>, List<MetadataType>>(); 640 } 641 for (Class<? extends MetadataType> mc : mclazzes) { 642 if (oMetadata.containsKey(mc)) { 643 continue; // do not overwrite original 644 } 645 646 List<MetadataType> l = origMetadata.get(mc); 647 List<MetadataType> nl = new ArrayList<MetadataType>(l.size()); 648 for (MetadataType m : l) { 649 nl.add(m.clone()); 650 } 651 oMetadata.put(mc, nl); 652 } 653 } 654 655 @SuppressWarnings("unchecked") 656 private List<Class<? extends MetadataType>> findAnnotatedMetadata(Class<? extends Annotation> aclazz) { 657 List<Class<? extends MetadataType>> mclazzes = new ArrayList<Class<? extends MetadataType>>(); 658 if (metadata == null) { 659 return mclazzes; 660 } 661 662 for (Class<? extends MetadataType> c : metadata.keySet()) { 663 boolean hasAnn = false; 664 for (MetadataType m : metadata.get(c)) { 665 if (m == null) { 666 continue; 667 } 668 669 Class<? extends MetadataType> mc = m.getClass(); 670 do { // iterate over super-classes 671 for (Field f : mc.getDeclaredFields()) { 672 if (f.isAnnotationPresent(aclazz)) { 673 hasAnn = true; 674 break; 675 } 676 } 677 Class<?> sclazz = mc.getSuperclass(); 678 if (!MetadataType.class.isAssignableFrom(sclazz)) { 679 break; 680 } 681 mc = (Class<? extends MetadataType>) sclazz; 682 } while (!hasAnn); 683 if (hasAnn) { 684 break; 685 } 686 } 687 if (hasAnn) { 688 mclazzes.add(c); 689 } 690 } 691 return mclazzes; 692 } 693 694 /** 695 * Gets the maximum size of a slice of a dataset in a given dimension 696 * which should normally fit in memory. Note that it might be possible 697 * to get more in memory, this is a conservative estimate and seems to 698 * almost always work at the size returned; providing Xmx is less than 699 * the physical memory. 700 * 701 * To get more in memory increase -Xmx setting or use an expression 702 * which calls a rolling function (like rmean) instead of slicing directly 703 * to memory. 704 * 705 * @param lazySet 706 * @param dimension 707 * @return maximum size of dimension that can be sliced. 708 */ 709 public static int getMaxSliceLength(ILazyDataset lazySet, int dimension) { 710 // size in bytes of each item 711 final double size = DTypeUtils.getItemBytes(DTypeUtils.getDTypeFromClass(lazySet.getElementClass()), lazySet.getElementsPerItem()); 712 713 // Max in bytes takes into account our minimum requirement 714 final double max = Math.max(Runtime.getRuntime().totalMemory(), Runtime.getRuntime().maxMemory()); 715 716 // Firstly if the whole dataset it likely to fit in memory, then we allow it. 717 // Space specified in bytes per item available 718 final double space = max/lazySet.getSize(); 719 720 // If we have room for this whole dataset, then fine 721 int[] shape = lazySet.getShape(); 722 if (space >= size) { 723 return shape[dimension]; 724 } 725 726 // Otherwise estimate what we can fit in, conservatively. 727 // First get size of one slice, see it that fits, if not, still return 1 728 double sizeOneSlice = size; // in bytes 729 for (int dim = 0; dim < shape.length; dim++) { 730 if (dim == dimension) { 731 continue; 732 } 733 sizeOneSlice *= shape[dim]; 734 } 735 double avail = max / sizeOneSlice; 736 if (avail < 1) { 737 return 1; 738 } 739 740 // We fudge this to leave some room 741 return (int) Math.floor(avail/4d); 742 } 743}