Source for gnu.gcj.runtime.PersistentByteMap

   1: /* Copyright (C) 2004, 2005  Free Software Foundation
   2: 
   3:    This file is part of libgcj.
   4: 
   5: This software is copyrighted work licensed under the terms of the
   6: Libgcj License.  Please consult the file "LIBGCJ_LICENSE" for
   7: details.  */
   8: 
   9: 
  10: 
  11: /*  A PersistentByteMap maps a byte array to another byte array.  It
  12: uses a file that does not need to be serialized but may be
  13: memory-mapped and read in-place.  So, even if there are many instances
  14: of gcj applications running, they can share PersistentByteMaps.
  15: 
  16: The idea is to make searches as fast as possible: opening a
  17: PersistentByteMap is cheap and search time doesn't grow with the
  18: number of entries in the table.  On the other hand, enumerating the
  19: map is slow, but that is a relatively uncommon operation.
  20: 
  21: The main use of this class is to provide a way to map the
  22: MessageDigest of a class file to the location of a DSO that contains
  23: the compiled version of that class.  It is up the the installer of an
  24: application to keep the DSO up to date with the jar.  
  25: 
  26: USAGE:
  27:         MessageDigest md = MessageDigest.getInstance("MD5");
  28:         digest = md.digest(bytes);
  29: 
  30:         PersistentByteMap map 
  31:           = new PersistentByteMap
  32:             (fileName, PersistentByteMap.AccessMode.READ_ONLY);
  33: 
  34:         byte[] soName = map.get(digest);
  35:         if (soName)
  36:           {
  37:             String SharedLibraryName = new String(soName);
  38: 
  39: BUGS/FEATURES:
  40:         remove() isn't written yet.
  41: 
  42:         capacity is fixed once the map has been created.
  43: 
  44:         We use linear probing to resolve collisions.  It might be
  45:         better to use a scheme that results in fewer probes to
  46:         determine that an item isn't found.  However, even when the
  47:         table is half full there are only on average 1.5 probes for a
  48:         successful search and 2.5 probes for an unsuccessful one.
  49: 
  50:     We don't do any locking at all: adding to a PersistentByteMap
  51:     at runtime is possible, but it requires filesystem locks
  52:     around get(), put(), and remove().
  53: */
  54: 
  55: package gnu.gcj.runtime;
  56: 
  57: import java.io.*;
  58: import java.nio.*;
  59: import java.nio.channels.*;
  60: import java.util.*;
  61: import java.security.MessageDigest;
  62: import java.math.BigInteger;
  63: 
  64: public class PersistentByteMap
  65: {
  66:   private MappedByteBuffer buf;
  67: 
  68:   static private final int MAGIC = 0;
  69:   static private final int VERSION = 4;
  70:   static private final int CAPACITY = 8;
  71:   static private final int TABLE_BASE = 12;
  72:   static private final int STRING_BASE = 16;
  73:   static private final int STRING_SIZE = 20;
  74:   static private final int FILE_SIZE = 24;
  75:   static private final int ELEMENTS = 28;
  76:   
  77:   static private final int INT_SIZE = 4;
  78: 
  79:   static private final int TABLE_ENTRY_SIZE = 2 * INT_SIZE;
  80: 
  81:   private int capacity;   // number of entries
  82:   private int table_base;   // offset from start of file, in bytes
  83:   private int string_base;  // offset from start of file, in bytes
  84:   private int string_size;  // size of string table, in bytes
  85:   private int file_size;    // size of file, in bytes;
  86:   private int elements;     // number of elements in table
  87: 
  88:   private long length;      // the length of the underlying file
  89: 
  90:   private final File name;  // The name of the underlying file
  91: 
  92:   static private final int UNUSED_ENTRY = -1; 
  93: 
  94:   static public final int KEYS = 0;
  95:   static public final int VALUES = 1;
  96:   static public final int ENTRIES = 2;
  97: 
  98:   private HashMap values;   // A map of strings in the string table.
  99: 
 100:   FileChannel fc;           // The underlying file channel.
 101: 
 102:   static final public class AccessMode
 103:   {
 104:     private final FileChannel.MapMode mapMode;
 105: 
 106:     static
 107:     {
 108:       READ_ONLY = new AccessMode(FileChannel.MapMode.READ_ONLY);
 109:       READ_WRITE = new AccessMode(FileChannel.MapMode.READ_WRITE);
 110:       PRIVATE = new AccessMode(FileChannel.MapMode.PRIVATE);
 111:     }
 112: 
 113:     public static final AccessMode READ_ONLY;
 114:     public static final AccessMode READ_WRITE; 
 115:     public static final AccessMode PRIVATE;
 116: 
 117:     private AccessMode(FileChannel.MapMode mode)
 118:     {
 119:       this.mapMode = mode;
 120:     }
 121:   }
 122: 
 123:   private PersistentByteMap(File name)
 124:   {
 125:     this.name = name;
 126:   }
 127: 
 128:   public PersistentByteMap(String filename, AccessMode mode)
 129:     throws IOException 
 130:   {
 131:     this(new File(filename), mode);
 132:   }
 133: 
 134:   public PersistentByteMap(File f, AccessMode mode)
 135:     throws IOException 
 136:   {
 137:     name = f;
 138: 
 139:     if (mode == AccessMode.READ_ONLY)
 140:       {
 141:         FileInputStream fis = new FileInputStream(f);
 142:         fc = fis.getChannel();
 143:       }
 144:     else
 145:       {
 146:         RandomAccessFile fos = new RandomAccessFile(f, "rw");
 147:         fc = fos.getChannel();
 148:       }
 149: 
 150:     length = fc.size();
 151:     buf = fc.map(mode.mapMode, 0, length);
 152: 
 153:     int magic = getWord (MAGIC);
 154:     if (magic != 0x67636a64) /* "gcjd" */
 155:       throw new IllegalArgumentException(f.getName());
 156: 
 157:     table_base = getWord (TABLE_BASE);
 158:     capacity = getWord (CAPACITY);
 159:     string_base = getWord (STRING_BASE);
 160:     string_size = getWord (STRING_SIZE);
 161:     file_size = getWord (FILE_SIZE);
 162:     elements = getWord (ELEMENTS);
 163: 
 164:     // FIXME:  Insert a bunch of sanity checks here
 165:   }
 166: 
 167:   private void init (PersistentByteMap m, File f, int capacity, int strtabSize)
 168:     throws IOException 
 169:   {
 170:     f.createNewFile();
 171:     RandomAccessFile raf = new RandomAccessFile(f, "rw");
 172: 
 173:     {        
 174:       // The user has explicitly provided a size for the table.
 175:       // We're going to make that size prime.  This isn't
 176:       // strictly necessary but it can't hurt.
 177:       //
 178:       // We expand the size by 3/2 and round the result because the
 179:       // hash table is intolerably slow when more than 2/3 full.
 180:       
 181:       BigInteger size = new BigInteger(Integer.toString(((capacity*3)+1)/2));
 182:       BigInteger two = BigInteger.ONE.add(BigInteger.ONE);
 183:       
 184:       if (size.getLowestSetBit() != 0) // A hard way to say isEven()
 185:     size = size.add(BigInteger.ONE);
 186:     
 187:       while (! size.isProbablePrime(10))
 188:     size = size.add(two);
 189:       
 190:       this.capacity = capacity = size.intValue();
 191:     }
 192: 
 193:     table_base = 64;
 194:     string_base = table_base + capacity * TABLE_ENTRY_SIZE;
 195:     string_size = 0;
 196:     file_size = string_base;
 197:     elements = 0;
 198: 
 199:     int totalFileSize = string_base + strtabSize;
 200: 
 201:     // Create the file; this rounds up the size of the file to a fixed
 202:     // number of 4k pages.
 203:     byte[] _4k = new byte[4096];
 204:     for (long i = 0; i < totalFileSize; i+= 4096)
 205:       raf.write(_4k);
 206:         
 207:     fc = raf.getChannel();
 208:     buf = fc.map(FileChannel.MapMode.READ_WRITE, 0, raf.length());
 209: 
 210:     for (int i = 0; i < capacity; i++)
 211:       putKeyPos(UNUSED_ENTRY, i);
 212:         
 213:     putWord(0x67636a64, MAGIC);
 214:     putWord(0x01, VERSION);
 215:     putWord(capacity, CAPACITY);
 216:     putWord(table_base, TABLE_BASE);
 217:     putWord(string_base, STRING_BASE);
 218:     putWord(file_size, FILE_SIZE);
 219:     putWord(elements, ELEMENTS);
 220:     buf.force();
 221: 
 222:     length = fc.size();
 223:     string_size = 0;
 224:   }     
 225: 
 226:   static public PersistentByteMap 
 227:   emptyPersistentByteMap(File name, int capacity, int strtabSize)
 228:     throws IOException 
 229:   {
 230:     PersistentByteMap m = new PersistentByteMap(name);
 231:     m.init(m, name, capacity, strtabSize);
 232:     return m;
 233:   }     
 234: 
 235:   private int getWord (int index)
 236:   {
 237:     buf.position(index);
 238:     byte[] wordBuf = new byte[4];
 239:     buf.get(wordBuf);
 240: 
 241:     int result = (int)wordBuf[0]&0xff;
 242:     result += ((int)wordBuf[1]&0xff) << 8;
 243:     result += ((int)wordBuf[2]&0xff) << 16;
 244:     result += ((int)wordBuf[3]&0xff) << 24;
 245:     return result;
 246:   }
 247: 
 248:   private void putWord (int word, int index)
 249:   {
 250:     buf.position(index);
 251:     byte[] wordBuf = new byte[4];
 252:     wordBuf[0] = (byte)(word);
 253:     wordBuf[1] = (byte)(word >>> 8);
 254:     wordBuf[2] = (byte)(word >>> 16);
 255:     wordBuf[3] = (byte)(word >>> 24);
 256:     buf.put(wordBuf);
 257:   }
 258: 
 259:   public Set entrySet()
 260:   {
 261:     return null;
 262:   }
 263: 
 264:   private int getBucket(int n)
 265:   {
 266:     return table_base + (2*n * INT_SIZE);
 267:   }
 268: 
 269:   private int getKeyPos(int n)
 270:   {
 271:     return getWord(getBucket(n));
 272:   }
 273:   
 274:   private int getValuePos(int n)
 275:   {
 276:     return getWord(getBucket(n) + INT_SIZE);
 277:   }
 278: 
 279:   private void putKeyPos(int index, int n)
 280:   {
 281:     putWord(index, getBucket(n));
 282:   }
 283: 
 284:   private void putValuePos(int index, int n)
 285:   {
 286:     putWord(index, getBucket(n) + INT_SIZE);
 287:   }
 288: 
 289:   private byte[] getBytes(int n)
 290:   {
 291:     int len = getWord (string_base + n);
 292:     int base = string_base + n + INT_SIZE;
 293:     byte[] key = new byte[len];
 294:     buf.position(base);
 295:     buf.get(key, 0, len);
 296:     return key;
 297:   }
 298: 
 299:   private int hash (byte[] b)
 300:   {    
 301:     // We assume that the message digest is evenly distributed, so we
 302:     // only need to use a few bytes of it as the hash function.
 303:     long hashIndex 
 304:       = ((b[0]&0xffL)
 305:          + ((b[1]&0xffL)<<8) 
 306:          + ((b[2]&0xffL)<<16) 
 307:          + ((b[3]&0xffL)<<24));
 308:     long result = hashIndex % (long)capacity;
 309:     return (int)result;
 310:   }
 311:         
 312:   public byte[] get(byte[] digest)
 313:   {
 314:     int hashIndex = hash(digest);
 315: 
 316:     do
 317:       {
 318:         int k = getKeyPos(hashIndex);
 319:         if (k == UNUSED_ENTRY)
 320:           return null;
 321: 
 322:         if (Arrays.equals ((byte[])digest, getBytes(k)))
 323:           return getBytes(getValuePos(hashIndex));
 324:                 
 325:         // Use linear probing to resolve hash collisions.  This may
 326:         // not be theoretically as good as open addressing, but it has
 327:         // good cache behviour.
 328:         hashIndex++;
 329:         hashIndex %= capacity;
 330:       }
 331:     while (true);
 332:   }
 333: 
 334:   public void put(byte[] digest, byte[] value)
 335:     throws IllegalAccessException
 336:   {
 337:     int hashIndex = hash(digest);
 338: 
 339:     if (elements >= capacity())
 340:       throw new IllegalAccessException("Table Full: " + elements);
 341: 
 342:     do
 343:       {
 344:         int k = getKeyPos(hashIndex);
 345:         if (k == UNUSED_ENTRY)
 346:           {
 347:             int newKey = addBytes(digest);
 348:             putKeyPos(newKey, hashIndex);
 349:             int newValue = addBytes(value);
 350:             putValuePos(newValue, hashIndex);
 351:             elements++;
 352:             putWord(elements, ELEMENTS);            
 353:             return;
 354:           }
 355:         else if (Arrays.equals (digest, getBytes(k)))
 356:           {
 357:             int newValue = addBytes((byte[])value);
 358:             putValuePos(newValue, hashIndex);
 359:             return;
 360:           }
 361:                 
 362:         hashIndex++;
 363:         hashIndex %= capacity;
 364:       }
 365:     while (true);
 366:   }
 367: 
 368:   private int addBytes (byte[] data)
 369:     throws IllegalAccessException
 370:   {
 371:     if (data.length > 16)
 372:       {
 373:     // Keep track of long strings in the hope that we will be able
 374:     // to re-use them.
 375:     if (values == null)
 376:       {
 377:         values = new HashMap();
 378:     
 379:         for (int i = 0; i < capacity; i++)
 380:           if (getKeyPos(i) != UNUSED_ENTRY)
 381:         {
 382:           int pos = getValuePos(i);
 383:           ByteWrapper bytes = new ByteWrapper(getBytes(pos));
 384:           values.put(bytes, new Integer(pos));
 385:         }
 386:       }
 387: 
 388:     {
 389:       Object result = values.get(new ByteWrapper(data));
 390:       if (result != null)
 391:         {
 392:           // We already have this value in the string table
 393:           return ((Integer)result).intValue();
 394:         }
 395:     }
 396:       }
 397: 
 398:     if (data.length + INT_SIZE >= this.length)
 399:       throw new IllegalAccessException("String table Full");
 400: 
 401:     int extent = string_base+string_size;
 402:     int top = extent;
 403:     putWord(data.length, extent);
 404:     extent += INT_SIZE;
 405:     buf.position(extent);
 406:     buf.put(data, 0, data.length);
 407:     extent += data.length;
 408:     extent += INT_SIZE-1;
 409:     extent &= ~(INT_SIZE-1); // align
 410:     string_size = extent - string_base;
 411:     file_size = extent;
 412:     putWord (string_size, STRING_SIZE);
 413:     putWord (file_size, FILE_SIZE);
 414: 
 415:     if (data.length > 16)
 416:       values.put(new ByteWrapper(data), new Integer(top - string_base));
 417:         
 418:     return top - string_base;
 419:   }
 420: 
 421:   public Iterator iterator(int type)
 422:   {
 423:     return new HashIterator(type);
 424:   }
 425: 
 426:   public int size()
 427:   {
 428:     return elements;
 429:   }
 430: 
 431:   public int stringTableSize()
 432:   {
 433:     return string_size;
 434:   }
 435: 
 436:   public int capacity()
 437:   {
 438:     // With the the table 2/3 full there will be on average 2 probes
 439:     // for a successful search and 5 probes for an unsuccessful one.
 440:     return capacity * 2/3;
 441:   }
 442: 
 443:   public void force()
 444:   {
 445:     buf.force();
 446:   }
 447: 
 448:   public File getFile()
 449:   {
 450:     return name;
 451:   }
 452: 
 453:   // Close the map.  Once this has been done, the map can no longer be
 454:   // used.
 455:   public void close() throws IOException
 456:   {
 457:     force();
 458:     fc.close();
 459:   }
 460: 
 461:   public void 
 462:   putAll(PersistentByteMap t)
 463:     throws IllegalAccessException
 464:   {
 465:     // We can use a fast copy if the size of a map has not changed.
 466:     if (this.elements == 0 && t.capacity == this.capacity
 467:     && t.length == this.length)
 468:       {
 469:     this.buf.position(0);
 470:     t.buf.position(0);
 471:     this.buf.put(t.buf);
 472:     this.table_base = t.table_base;
 473:     this.string_base = t.string_base;
 474:     this.string_size = t.string_size;
 475:     this.file_size = t.file_size;
 476:     this.elements = t.elements;
 477:     if (t.values != null)
 478:       this.values = (HashMap)t.values.clone();
 479:     return;
 480:       }
 481: 
 482:     // Otherwise do it the hard way.
 483:     Iterator iterator = t.iterator(PersistentByteMap.ENTRIES);
 484:     while (iterator.hasNext())
 485:       {
 486:     PersistentByteMap.MapEntry entry 
 487:       = (PersistentByteMap.MapEntry)iterator.next();
 488:     this.put((byte[])entry.getKey(), (byte[])entry.getValue());
 489:       }
 490:   }
 491:     
 492: 
 493:   private final class HashIterator implements Iterator
 494:   {
 495:     /** Current index in the physical hash table. */
 496: 
 497:     private int idx;
 498:     private int count;
 499:     private final int type;
 500: 
 501:     /**
 502:      * Construct a new HashIterator with the supplied type.
 503:      * @param type {@link #KEYS}, {@link #VALUES}, or {@link #ENTRIES}
 504:      */
 505:     HashIterator(int type)
 506:     {
 507:       this.type = type;
 508:       count = elements;
 509:       idx = 0;
 510:     }
 511: 
 512:     /**
 513:      * Returns true if the Iterator has more elements.
 514:      * @return true if there are more elements
 515:      * @throws ConcurrentModificationException if the HashMap was modified
 516:      */
 517:     public boolean hasNext()
 518:     {
 519:       return count > 0;
 520:     }
 521: 
 522:     /**
 523:      * Returns the next element in the Iterator's sequential view.
 524:      * @return the next element
 525:      * @throws ConcurrentModificationException if the HashMap was modified
 526:      * @throws NoSuchElementException if there is none
 527:      */
 528:     public Object next()
 529:     {
 530:       count--;
 531:       for (int i = idx; i < capacity; i++)
 532:         if (getKeyPos(i) != UNUSED_ENTRY)
 533:           {
 534:             idx = i+1;
 535:             if (type == VALUES)
 536:               return getBytes(getValuePos(i));
 537:             if (type == KEYS)
 538:               return getBytes(getKeyPos(i));
 539:             return new MapEntry(i,
 540:                                 getBytes(getKeyPos(i)),
 541:                                 getBytes(getValuePos(i)));
 542:           }
 543:       return null;
 544:     }    
 545: 
 546:     /**
 547:      * Remove from the underlying collection the last element returned
 548:      * by next (optional operation). This method can be called only
 549:      * once after each call to <code>next()</code>. It does not affect
 550:      * what will be returned by subsequent calls to next.
 551:      *
 552:      * @throws IllegalStateException if next has not yet been called
 553:      *         or remove has already been called since the last call
 554:      *         to next.
 555:      * @throws UnsupportedOperationException if this Iterator does not
 556:      *         support the remove operation.
 557:      */
 558:      public void remove()
 559:     {
 560:       throw new UnsupportedOperationException();
 561:     }
 562:   }
 563: 
 564:   static public final class MapEntry
 565:   {
 566:     private final Object key;
 567:     private final Object value;
 568:     private final int bucket;
 569: 
 570:     public MapEntry(int bucket, Object newKey, Object newValue)
 571:     {
 572:       this.key = newKey;
 573:       this.value = newValue;
 574:       this.bucket = bucket;
 575:     }
 576: 
 577:     public final Object getKey()
 578:     {
 579:       return key;
 580:     }
 581: 
 582:     public final Object getValue()
 583:     {
 584:       return value;
 585:     }
 586: 
 587:     public final int getBucket()
 588:     {
 589:       return bucket;
 590:     }
 591:   }
 592: 
 593:   // A wrapper class for a byte array that allows collections to be
 594:   // made.
 595:   private final class ByteWrapper
 596:   {
 597:     final byte[] bytes;
 598:     final int hash;
 599: 
 600:     public ByteWrapper (byte[] bytes)
 601:     {
 602:       int sum = 0;
 603:       this.bytes = bytes;
 604:       for (int i = 0; i < bytes.length; i++)
 605:     sum += bytes[i];
 606:       hash = sum;
 607:     }
 608: 
 609:     public int hashCode()
 610:     {
 611:       return hash;
 612:     }
 613:   
 614:     public boolean equals(Object obj)
 615:     {
 616:       return Arrays.equals(bytes, ((ByteWrapper)obj).bytes);
 617:     }
 618:   }
 619: }