HashMap源码分析_前嗅大数据

HashMap源码分析

时间: 2020-08-12来源：OSCHINA

前景提要

HashMap源码 JDK版本: 1.8 类定义代码 public class HashMap<K,V> extends AbstractMap<K,V> implements Map<K,V>, Cloneable, Serializable 继承 AbstractMap , AbstractMap 实现 Map 实现 Map , Cloneable , Serializable Map Cloneable 标记接口可以克隆：深克隆和浅克隆 Serializable 标记接口序列化常量 //默认初始容量16 static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; //最大容量 static final int MAXIMUM_CAPACITY = 1 << 30; //默认负载因子 static final float DEFAULT_LOAD_FACTOR = 0.75f; //链表长度超过8时，链表转换为红黑树 static final int TREEIFY_THRESHOLD = 8; //容量小于等于6时，红黑树转为链表 static final int UNTREEIFY_THRESHOLD = 6; //红黑树的最大节点数 static final int MIN_TREEIFY_CAPACITY = 64; 属性 //hash表 transient Node<K,V>[] table; //缓存 transient Set<Map.Entry<K,V>> entrySet; //键值对数量 transient int size; //被修改次数，用于快速失败 transient int modCount; //扩容容量临界值：capacity * load factor，达到此值需要扩容 int threshold; //负载因子 final float loadFactor; 构造器指定初始化容量及负载因子 public HashMap(int initialCapacity, float loadFactor) { //校验容量 if (initialCapacity < 0) throw new IllegalArgumentException("Illegal initial capacity: " + initialCapacity); //校验容量 if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY; //校验负载因子 if (loadFactor <= 0 || Float.isNaN(loadFactor)) throw new IllegalArgumentException("Illegal load factor: " + loadFactor); this.loadFactor = loadFactor; //计算扩容临界值 this.threshold = tableSizeFor(initialCapacity); } public static final float NaN = 0.0f / 0.0f; public static boolean isNaN(float v) { //这个函数比较有趣，判断是否非法的float。 //float中定义了一个NaN常量，利用此常量与任何值（包括NaN）都不相等的特性，判断是否是合法数字 return (v != v); } //该算法计算容量大小 static final int tableSizeFor(int cap) { //减1的目的是，针对正好是2的幂的整数，如果经过下面右移+位或算法后， //该数必然大于当前的整数，则导致计算出的容量为当前值的2倍， //理论上容量值应该是当前值。 //所以，无论是2的幂还是小于2的幂的数值，减1后保证容量值等于离当前最近的2的幂 int n = cap - 1; //将N进行无符号右移+位或操作，目的是将最高位的1后面全部变为1 //经过此操作后，末位必然是1，即该数是奇数 //假设n=01xx,xxxx //第一次：n=011x,xxxx n |= n >>> 1; //第二次：n=0111，1xxx n |= n >>> 2; //第三次：n=0111，1111 n |= n >>> 4; //第四次：n=0111,1111 n |= n >>> 8; //第五次：n=0111,1111 n |= n >>> 16; //经过上面的算法，算出来的值必然是奇数，同时加1必然为2的整数幂 return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1; } 指定容量 public HashMap(int initialCapacity) { this(initialCapacity, DEFAULT_LOAD_FACTOR); } 默认 public HashMap() { //默认负载因子 this.loadFactor = DEFAULT_LOAD_FACTOR; } 数据结构链表 static class Node<K,V> implements Map.Entry<K,V> { //缓存hash，避免重复计算 final int hash; final K key; V value; //next节点 Node<K,V> next; Node(int hash, K key, V value, Node<K,V> next) { this.hash = hash; this.key = key; this.value = value; this.next = next; } public final K getKey() { return key; } public final V getValue() { return value; } //覆写toString public final String toString() { return key + "=" + value; } //覆写hashCode方法 public final int hashCode() { //将key和value的hash异或 return Objects.hashCode(key) ^ Objects.hashCode(value); } public final V setValue(V newValue) { V oldValue = value; value = newValue; return oldValue; } //覆写equals方法，理论上必然覆写hashcode方法 public final boolean equals(Object o) { if (o == this) return true; if (o instanceof Map.Entry) { Map.Entry<?,?> e = (Map.Entry<?,?>)o; if (Objects.equals(key, e.getKey()) && Objects.equals(value, e.getValue())) return true; } return false; } } 红黑树
待研究... 方法 put() public V put(K key, V value) { return putVal(hash(key), key, value, false, true); } static final int hash(Object key) { int h; //将key的hashcode右移做异或操作 //hashcode右移16位，正好是32位的一半，然后与原hashcode异或， //这样将高16位的特征部分保留到了低16位，所以该函数称为扰动函数 //原因：计算桶位置的函数为(len-1)&hash，即使hash再松散，这个函数仅取的低位，势必造成更多的冲突 //key==null时，返回0，计算index时，也为0，说明key==null的节点放到第0个桶的位置 return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16); } final V putVal(int hash, K key, V value, boolean onlyIfAbsent, boolean evict) { Node<K,V>[] tab; Node<K,V> p; int n, i; //原table为空时，初始化 if ((tab = table) == null || (n = tab.length) == 0) n = (tab = resize()).length; //计算下标index，头结点不存在，新建节点 //注意计算index的公式：(n-1)&hash，取hash的低位 //n为2的幂，减1后低位均为1，和hash位与后，其实是取模运算，这样效率更高 //key==null，index=0，放到第0个桶 if ((p = tab[i = (n - 1) & hash]) == null) tab[i] = newNode(hash, key, value, null); else { Node<K,V> e; K k; //判断头节点key是否与当前key一样 //1、先判断hash，hash不同，则必然不等 //2、判断key是否相等，对于类似Integer的-127~127的范围内比较相等， //以及String a1="hello" 和String a2="hello"等等比较相等，可快速结束比较 //3、通过使用==不相等对象，则使用equals判断 if (p.hash == hash && ((k = p.key) == key || (key != null && key.equals(k)))) e = p; //该链是红黑树 else if (p instanceof TreeNode) e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value); else { //该链是链表 //查找链表尾节点 for (int binCount = 0; ; ++binCount) { if ((e = p.next) == null) { //链表尾结点后新增节点，此时e=null p.next = newNode(hash, key, value, null); if (binCount >= TREEIFY_THRESHOLD - 1) //如果链表长度达到链表转为红黑树的临界值，则转为红黑树 treeifyBin(tab, hash); break; } //判断key是否在链表中已存在,若存在，则结束，此时e=p.next if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) break; p = e; } } if (e != null) { //key在map中存在 V oldValue = e.value; if (!onlyIfAbsent || oldValue == null) //新值覆盖旧值 e.value = value; //空方法，给LinkedHashMap覆写使用 afterNodeAccess(e); //返回旧值 return oldValue; } } ++modCount; if (++size > threshold) //size+1,同时判断是否达到扩容临界值（threshold=capacity * load factor），达到则扩容 resize(); //空方法，给LinkedHashMap覆写使用 afterNodeInsertion(evict); //新增节点成功，返回null return null; } 总结对key计算hash，h = key.hashCode()) ^ (h >>> 16) 原哈希桶为空时，初始化，resize() 计算下标index，(n - 1) & hash index位置为空，则新建节点不为空判断index位置节点是否与传入的key相等，相等则记录当前节点e
p.hash == hash && ((k = p.key) == key || (key != null && key.equals(k))) 判断是否是红黑树，是则按照红黑树逻辑走否则是链表遍历链表，判断传入的key是否在链表中存在，存在则记录当前节点e 不存在，新增节点到链表尾部。同时判断链表长度是否达到链表转红黑树条件，达到则转为红黑树经上处理，若e不为空，则该key在链表中存在，将新值覆盖旧值，返回旧值哈希桶size+1，判断是否达到扩容临界值，达到则扩容resize() 最后返回null，新增节点成功 resize() final Node<K,V>[] resize() { Node<K,V>[] oldTab = table; int oldCap = (oldTab == null) ? 0 : oldTab.length; int oldThr = threshold; int newCap, newThr = 0; if (oldCap > 0) { //扩容 if (oldCap >= MAXIMUM_CAPACITY) { threshold = Integer.MAX_VALUE; return oldTab; } //新桶数=旧桶数*2 //当旧桶数<16时，需重新计算threshold扩容临界值；如旧桶数=2，threshold=1等，属于特殊例子，实际不够3/4整乘 //扩容临界值计算公式：threshold=loadFactor*桶数 //当旧桶数>=16时，由于桶数扩为原来的2倍，则新的threshold=loadFactor*旧桶数*2=旧threshold*2 else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY && oldCap >= DEFAULT_INITIAL_CAPACITY) newThr = oldThr << 1; // double threshold } else if (oldThr > 0) //构建HashMap时，初始化时指定了容量大小 newCap = oldThr; else { //构建HashMap时，初始化时未指定任何参数 newCap = DEFAULT_INITIAL_CAPACITY; newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY); } if (newThr == 0) { //构建HashMap时，指定了容量大小 //或者扩容时，旧桶数<16的，重新计算threshold float ft = (float)newCap * loadFactor; newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ? (int)ft : Integer.MAX_VALUE); } //将新的扩容临界值赋值给threshold threshold = newThr; @SuppressWarnings({"rawtypes","unchecked"}) //用新的桶数新建节点数组 Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap]; table = newTab; if (oldTab != null) { //扩容时，重新计算index，移动节点到新的位置 for (int j = 0; j < oldCap; ++j) { Node<K,V> e; if ((e = oldTab[j]) != null) { //当前节点不为空 //将当前节点置null，方便gc回收 oldTab[j] = null; if (e.next == null) //当前节点的next为空，则只有一个节点 //重新计算index，取模运算 newTab[e.hash & (newCap - 1)] = e; else if (e instanceof TreeNode) //红黑树 ((TreeNode<K,V>)e).split(this, newTab, j, oldCap); else { //移动链表节点，采用高低位指针 //扩容翻倍，原链表上的每个节点，低位，存放到原来的位置，高位，存放到下标：低位+原桶数 //低位链表的头节点，尾结点 Node<K,V> loHead = null, loTail = null; //高位链表的头节点，尾结点 Node<K,V> hiHead = null, hiTail = null; //临时存在next节点 Node<K,V> next; do { next = e.next; //计算hash&旧桶数，如果=0，表示小于原桶数，属于低位，放原位置即可。 //如果>0，表示大于原桶数，可以放高位 //例如：len=16，hash=13，则10000&1101=0,属于低位 //len=8，hash=13，则1000&1101=1000=8>0,属于高位 //高位的意思是该节点还可以分散到扩容后的数组更高的位置 if ((e.hash & oldCap) == 0) { //低位链表尾部插入新节点 if (loTail == null) //低位尾结点为空，则将头节点初始化为当前节点 loHead = e; else //不为空，将尾节点的next节点指向当前节点 loTail.next = e; //同时将当前节点置为尾结点 loTail = e; } else { //高位链表尾部插入新节点 if (hiTail == null) //高位尾结点为空，将高位头节点指向当前节点 hiHead = e; else //不为空，将尾结点的next节点指向当前节点 hiTail.next = e; //同时将当前节点置为尾结点 hiTail = e; } } while ((e = next) != null); if (loTail != null) { //低位链表存在 //原链表处理完后，将低位尾部指针next置为null loTail.next = null; //新桶原位置存储低位链表 newTab[j] = loHead; } if (hiTail != null) { //高位链表存在 //原链表处理完后，将高位尾部指针next置为null hiTail.next = null; //高位链表放到原链表位置+旧桶数量 newTab[j + oldCap] = hiHead; } } } } } return newTab; } 总结桶容量>0，则扩容容量边界校验旧桶容量翻倍，边界校验，条件允许则扩容临界值翻倍桶容量=0，扩容临界值>0，初始化HashMap时指定了容量大小新桶容量=扩容临界值，初始化时的扩容临界值即桶容量大小否则，初始化HashMap时，未指定任何参数，使用默认参数初始化扩容临界值=0 计算扩容临界值, newCap * loadFactor 使用新容量生成新哈希桶旧哈希桶不为空，需将原节点移动到新桶遍历旧桶，桶当前位置不为空桶位置仅一个节点，重新计算index 红黑树否则链表判断高低位，生成低位链表、高位链表， e.hash & oldCap 低位链表不动，高位链表新位置为原位置+旧桶容量分析原因：定义a= e.hash & oldCap ,b=旧容量数组大小，c=新容量大小=2*b b为2的幂，所以其二进制仅存在一个高位1，其余为0. a==0,则b的高位1对应的hash同样的位置肯定为0 a>0,则b的高位1对应的hash同样的位置肯定不为1 如hash=1011,b=0100,则hash&b=0 如hash=1101,b=0100,则hash&b=0100=b 由于c是b的2倍，即b的高位1左移1位，其余均为0 计算index时， (c - 1) & hash ，c-1则在c的高位1转为0，其右面位置均为1 如hash=1011,c=1000,则index=1011&0111=0011=3 如hash=1101,c=1000,则index=1101&0111=0101=5 如上， c-1 实际是包含了b的高位1，如果a==0，则index肯定不包含b高位置的1，即位置未变化如果a>0，则index肯定包含b高位置的1，即在原有index的基础上，加上了一个b 最终的结果就是低位链表位置不变化。高位链表位置是b+原index 综上，运用的很巧妙，避免了再次计算index 返回新桶 get() public V get(Object key) { Node<K,V> e; //先计算hash，再获取节点 return (e = getNode(hash(key), key)) == null ? null : e.value; } final Node<K,V> getNode(int hash, Object key) { Node<K,V>[] tab; Node<K,V> first, e; int n; K k; //判断哈希桶是否为空，计算index，判断index位置是否为空 if ((tab = table) != null && (n = tab.length) > 0 && (first = tab[(n - 1) & hash]) != null) { //判断头结点是否相等 if (first.hash == hash && ((k = first.key) == key || (key != null && key.equals(k)))) return first; //判断头结点的next是否为空 if ((e = first.next) != null) { if (first instanceof TreeNode) //红黑树 return ((TreeNode<K,V>)first).getTreeNode(hash, key); do { //遍历节点，是否存在该key if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) return e; } while ((e = e.next) != null); } } return null; } 总结默认容量16，默认负载因子0.75 采用数组+链表+红黑树实现链表长度超过8且数组长度>=64时，转为红黑树；小于6时，红黑树转为链表计算数组容量 tableSizeFor() ,2的幂计算hash (h = key.hashCode()) ^ (h >>> 16) ，低位保留高位特征计算index (n - 1) & hash ，实际是取模运算，位运算优于取模运算扩容时翻倍 threshold扩容临界值 newCap * loadFactor 移动链表时，采用高低位指针,避免再次计算hash和index。高低位判断 (e.hash & oldCap)==0 低位。低位链表位置不变，高位链表原位置+旧数组容量避免了jdk1.7 HashMap的环链形成环链形成原因 resize时，线程1执行时，获得节点a的next是b。此时，时间片切换，线程2将a和b倒置放入到新的数组，b的next变成了a 此时，线程1获得时间片，继续执行，a的next是b，b的next是a，环链形成根本原因是resize时移动链表节点采用的是头插法 jdk1.8 采用尾插法的方式移动链表节点，避免了环链的形成