在nvmain的源代码中,可以看到EnergyModel有两种,energy和current。
这可以在config文件中进行配置,比如说PCM_ISSCC_2012_4GB.config中这样配置:
同样的,在STTRAM以及RRAM等NVM存储器中也都是energy,如上配置,而在DRAM等易失性存储器中这样配置:
源码中针对NVM存储器的EnergyModel,采用如下这样一种很简单的累加方式来计算能耗,其中Erd是单个mat的读能耗,在PCM_ISSCC_2012_4GB.config中设定值为0.081200,同时设定Ewr即写能耗(SET或者RESET)为1.684811:
1 2 3 4 5 else { subArrayEnergy += p->Erd; activeEnergy += p->Erd; }
Write(NVMainRequest *request ) 首先搞清楚一些基本知识。
1 2 3 4 5 6 7 8 enum SubArrayState { SUBARRAY_UNKNOWN, SUBARRAY_OPEN, SUBARRAY_CLOSED, SUBARRAY_PRECHARGING, SUBARRAY_REFRESHING };
SubArrayState有UNKNOWN、OPEN、CLOSED、PRECHARGING、REFRESHING五种状态:
Precharge:对于处于打开状态(这儿打开是指把page内容放入到Sense Amplifier)的page,我们可以进行读写操作,如果不需要再对该page进行读写操作,可以关闭该page, 把该page内容写入bank的行列单元对应的page中,然后DRAM core才能够准备下一个数据访问,以便对其它page进行读写操作。这个关闭操作通过发射一个Precharge命令实现,precharge命令可以关闭某一个bank,也可以关闭rank中所有打开的bank。
Refreshing:DRAM(Dynamic Random Access Memory,即动态随机存取存储器)之所以称为DRAM,就是因为它要不断进行刷新(Refresh)才能保留住数据,因此它是DRAM最重要的操作。Refresh操作与Precharge中重写的操作一样,都是用S-AMP先读再写。但为什么有Precharge操作还要进行Refresh呢?因为Precharge是对一个或所有Bank中的工作行操作,并且是不定期的,而刷新则是有固定的周期,依次对所有行进行操作,以保留那些久久没经历重写的存储体中的数据。
1 2 3 4 5 6 enum WriteMode { WRITE_BACK, WRITE_THROUGH, DELAYED_WRITE };
WriteMode有三种,WRITE_BACK、WRITE_THROUGH以及DELAYED_WRITE:
WRITE_BACK:只更新行缓冲区;
WRITE_THROUGH:更新行缓冲区和cell;
DELAYED_WRITE:数据被存储在写缓冲区;
Write函数分析 这个函数很重要,大致写一下自己的理解。
1 2 3 4 5 6 7 8 9 10 void NVMAddress::GetTranslatedAddress( uint64_t *addrRow, uint64_t *addrCol, uint64_t *addrBank, uint64_t *addrRank, uint64_t *addrChannel, uint64_t *addrSA ){ if ( addrRow ) *addrRow = row; if ( addrCol ) *addrCol = col; if ( addrBank ) *addrBank = bank; if ( addrRank ) *addrRank = rank; if ( addrChannel ) *addrChannel = channel; if ( addrSA ) *addrSA = subarray; }
首先进行sanity完整性检查,这一部分的完整性检查必不可少,不能完全信任IsIssuable()而缺少完整性检查。
若nextWrite大于事件队列的当前时钟周期,则Subarray违反写时序限制;若SubArrayState不等于SUBARRAY_OPEN,则试图对非active状态的subarray进行写入而报错;若writeRow不等于openRow,则试图对没有open的行进行写入而报错。
若writeMode为WRITE_THROUGH,则需要更新行缓冲区和cell。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 if ( writeMode == WRITE_THROUGH ) { encLat = (dataEncoder ? dataEncoder->Write( request ) : 0 ); endrLat = UpdateEndurance( request ); if ( !p->WriteAllBits ) { uint8_t *bitCountData = new uint8_t [request->data.GetSize()]; for ( uint64_t bitCountByte = 0 ; bitCountByte < request->data.GetSize(); bitCountByte++ ) { bitCountData[bitCountByte] = request->data.GetByte( bitCountByte ) ^ request->oldData.GetByte( bitCountByte ); } ncounter_t bitCountWords = request->data.GetSize()/4 ; ncounter_t numChangedBits = CountBitsMLC1( 1 , (uint32_t *)bitCountData, bitCountWords ); assert( request->data.GetSize()*8 >= numChangedBits ); numUnchangedBits = request->data.GetSize()*8 - numChangedBits; } }
1 2 3 4 5 6 7 8 9 10 11 ncounter_t NO_OPT SubArray::Count32MLC1( uint32_t data ){ uint32_t count = data; count = count - ((count >> 1 ) & 0x55555555 ); count = (count & 0x33333333 ) + ((count >> 2 ) & 0x33333333 ); count = (((count + (count >> 4 )) & 0x0f0f0f0f ) * 0x01010101 ) >> 24 ; return static_cast <ncounter_t >(count); }
1 2 3 4 5 6 7 8 9 10 11 12 ncounter_t NO_OPT SubArray::CountBitsMLC1( uint8_t value, uint32_t *data, ncounter_t words ) { ncounter_t count = 0 ; for ( ncounter_t i = 0 ; i < words; i++ ) { count += Count32MLC1( data[i] ); } count = (value == 1 ) ? count : (words*32 - count); return count; }
EnergyModel能耗优化计算方案 基于NVM的主存储器设计一种延迟和能量优化写方案。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 if ( p->EnergyModel == "current" ) { subArrayEnergy += ( ( p->EIDD4W - p->EIDD3N ) * (double )(p->tBURST) ) / (double )(p->BANKS); burstEnergy += ( ( p->EIDD4W - p->EIDD3N ) * (double )(p->tBURST) ) / (double )(p->BANKS); } else { uint32_t *rawData; uint32_t *oldData; ncounter_t memoryWordSize = 64 * 8 ; ncounter_t size = 0 ; if (request->data.IsCompressed()) { rawData = reinterpret_cast <uint32_t *>(request->data.comData); memoryWordSize = request->data.GetComSize()*8 ; } else { rawData = reinterpret_cast <uint32_t *>(request->data.rawData); } if (request->oldData.IsCompressed()) { oldData = reinterpret_cast <uint32_t *>(request->oldData.comData); } else { oldData = reinterpret_cast <uint32_t *>(request->oldData.rawData); } size = memoryWordSize / 32 ; double energy = 0 ; unsigned int i = 0 ; ncounter_t i_pos = 0 ; uint32_t word; uint32_t oldWord; uint32_t mask = 0x00000007 ; uint32_t byte; uint32_t oldByte; ncounter_t writeCount[8 ]; ncounter_t EwrTLC[8 ]; EwrTLC[0 ] = p->Ewr000; EwrTLC[1 ] = p->Ewr001; EwrTLC[2 ] = p->Ewr010; EwrTLC[3 ] = p->Ewr011; EwrTLC[4 ] = p->Ewr100; EwrTLC[5 ] = p->Ewr101; EwrTLC[6 ] = p->Ewr110; EwrTLC[7 ] = p->Ewr111; for (i_pos = 0 ; i_pos < 8 ; i_pos++) writeCount[i_pos] = 0 ; for (i = 0 ; i < size; i++) { word = rawData[i]; oldWord = oldData[i]; for (i_pos = 0 ; i_pos < 11 ; i_pos++) { byte = word & mask; oldByte = oldWord & mask; if (byte != oldByte) writeCount[byte]++; word = word >> 3 ; oldWord = oldWord >> 3 ; } } size = memoryWordSize % 32 ; if (size != 0 ) { word = rawData[i]; oldWord = oldData[i]; ncounter_t nums_r = size / 3 ; if (size % 3 != 0 ) nums_r++; for (i_pos = 0 ; i_pos < (11 -nums_r); i_pos++) { word = word >> 3 ; oldWord = oldWord >> 3 ; } for (; i_pos < 11 ; i_pos++) { byte = word & mask; oldByte = oldWord & mask; if (byte != oldByte) writeCount[byte]++; word = word >> 3 ; oldWord = oldWord >> 3 ; } } for (i_pos = 0 ; i_pos < 8 ; i_pos++) { energy += writeCount[i_pos] * EwrTLC[i_pos]; } subArrayEnergy += energy; burstEnergy += p->Ewr; }