FlipNWrite源码解析与修改 | 乌龙波霸七分甜

在阅读nvmain源码的过程中，发现nvmain里面自带有FlipNWrite的实现方法，只是默认的编码方式并不是FlipNWrite而是default。这是我试着自己修改nvmain代码的第一步，在config配置文件中设置编码方式，这样有一个好处，不用去修改源码，不需要重新编译，方式为根据config文件语法直接设置：DataEncoder FlipNWrite。运行测试命令：

./build/ARM/gem5.opt configs/example/se.py -c tests/test-progs/hello/bin/arm/linux/hello --cpu-type=detailed --caches --l2cache --mem-type=NVMainMemory --nvmain-config=./nvmain/Config/PCM_ISSCC_2012_4GB.config

不出所料，测试结果中多了三行输出，但是似乎是错误的，全为0？怎么可能。

images

什么原因导致的呢？

因为执行的se测试，负载为hello程序，根本没有写操作，从输出结果中也可以看出：

images

写次数为0，那位翻转次数当然也是0咯。那怎么办呢？

（1）改写负载程序。

不使用自带的hello二进制程序负载进行测试，修改测试负载为有写操作的数组赋值操作，在/home/malizhen/Workspace文件夹下编写自己的hello.c如下：

#include<stdio.h>
int main()
{
    int i=0;
    int j=0;
    long count=0;
    long temp[4096][8];

    for(i=0;i<4096;i++)
    {
      for(j=0;j<8;j++)
      {
         temp[i][j]=count++;
      }
    }
}

在当前目录执行静态编译
1
gcc -o hello hello.c -static

生成hello*二进制文件，然后用该文件作为负载测试文件，不出意外会有写操作了。在这里要注意，二进制负载文件不使用自带的（自带的helloc二进制文件都在使用的对应架构的文件夹目录下）而是使用自己编译生成的（可以在任意目录下），这时只能在X86架构下进行测试，发现ALPHA和ARM架构下都会出错（如果将自己编译生成的二进制文件放进ALPHA和ARM架构文件夹目录下，应该也不会出错吧），这是因为电脑是X86，如果想在别的架构下运行任意二进制程序必须配置交叉编译环境。

1	./build/X86/gem5.opt configs/example/se.py -c ../../Workspace/hello --cpu-type=detailed --caches --l2cache --mem-type=NVMainMemory --nvmain-config=./nvmain/Config/PCM_ISSCC_2012_4GB.config

不幸的是，测试输出结果中写次数仍然为0，这就有点奇怪了。为什么呢？

（2）想过增加指令条数，但是增加至超级大了也不管用。

./build/X86/gem5.opt configs/example/se.py -c ../../Workspace/hello --cpu-type=detailed --caches --l2cache 64 --mem-type=NVMainMemory --nvmain-config=./nvmain/Config/PCM_ISSCC_2012_4GB.config -I 10000000

（3）查看cache大小发现为64 B，太大了，设置更小的cache大小之后就搞定了，为啥呢？

./build/X86/gem5.opt configs/example/se.py --cpu-clock=3GHz -n 4 -c ../../Workspace/hello --cpu-type=detailed --caches --l1d_size=32kB --l1d_assoc=8 --l1i_size=32kB --l1i_assoc=8 --l2cache --l2_size=256kB --l2_assoc=8 --l3cache --l3_size=4MB --l3_assoc=16 --mem-type=NVMainMemory --nvmain-config=./nvmain/Config/PCM_ISSCC_2012_4GB.config

images

当时排查问题的时候，想要在函数里面直接cout，输出一些函数的中间值，进行查看分析。但是一旦修改了代码，就得重新编译，再进行测试，从测试的命令行输出结果能够查看中间值，这种方法因为要反复编译，不建议。那有没有更好更简洁的方式呢？可以参考网页gem5 Debug官方文档使用gdb进行调试。

另外可以在gem5/m5out/stats.txt文件中查看更加详细的输出结果，比如命令行窗口结果中的：

1 2	i0.defaultMemory.totalReadRequests 4659 i0.defaultMemory.totalWriteRequests 224

对应stats.txt文件中的:

images

源码解析

#include "DataEncoders/FlipNWrite/FlipNWrite.h"

#include <iostream>

using namespace NVM;

FlipNWrite::FlipNWrite( )
{
    flippedAddresses.clear( );
    std::cout<<"hahhahhahhaha"<<std::endl;

    /* Clear statistics */
    bitsFlipped = 0;
    bitCompareSwapWrites = 0;
}


FlipNWrite::~FlipNWrite( )
{
    /*
     *  Nothing to do here. We do not own the *config pointer, so
     *  don't delete that.
     */
}


void FlipNWrite::SetConfig( Config *config, bool /*createChildren*/ )
{
    Params *params = new Params( );
    params->SetParams( config );
    SetParams( params );
//可以设置flipNWrite的粒度，论文中默认设置的就是一个word为32bit
    /* Cache granularity size. */
    fpSize = config->GetValue( "FlipNWriteGranularity" );

    /* Some default size if the parameter is not specified */
    if( fpSize == -1 )
        fpSize = 32; 
}

void FlipNWrite::RegisterStats( )
{
    AddStat(bitsFlipped);
    AddStat(bitCompareSwapWrites);
    AddUnitStat(flipNWriteReduction, "%");
}

void FlipNWrite::InvertData( NVMDataBlock& data, uint64_t startBit, uint64_t endBit )
{
    uint64_t wordSize;
    int startByte, endByte;
    //wordSize以Byte为单位，表示一次burst的数据量

    wordSize = p->BusWidth;
    wordSize *= p->tBURST * p->RATE;
    wordSize /= 8;
//除法int型为向下取整，去掉小数部分，startBit最小可以是0
    startByte = (int)(startBit / 8);
    endByte = (int)((endBit - 1) / 8);

    for( int i = startByte; i <= endByte; i++ )
    {
        uint8_t originalByte = data.GetByte( i );
        uint8_t shiftByte = originalByte;
        uint8_t newByte = 0;

        for( int j = 0; j < 8; j++ )
        {
            uint64_t currentBit = i * 8 + j;
           
            if( currentBit < startBit || currentBit >= endBit )
            {
                shiftByte = static_cast<uint8_t>(shiftByte >> 1);
                continue;
            }
//如果shiftByte为全0，newByte变为全1
            if( !(shiftByte & 0x1) )
            {
                newByte = static_cast<uint8_t>(newByte | (1 << (7-j)));
            }

            shiftByte = static_cast<uint8_t>(shiftByte >> 1);
        }

        data.SetByte( i, newByte );
    }
}

ncycle_t FlipNWrite::Read( NVMainRequest* /*request*/ )
{
    ncycle_t rv = 0;

    // TODO: Add some energy here

    return rv;
}

ncycle_t FlipNWrite::Write( NVMainRequest *request ) 
{
    NVMDataBlock& newData = request->data;
    NVMDataBlock& oldData = request->oldData;
    NVMAddress address = request->address;

    /*
     *  The default life map is an stl map< uint64_t, uint64_t >. 
     *  You may map row and col to this map_key however you want.
     *  It is up to you to ensure there are no collisions here.
    */

    uint64_t row;
    uint64_t col;
    ncycle_t rv = 0;

    request->address.GetTranslatedAddress( &row, &col, NULL, NULL, NULL, NULL );

    /*
     *  If using the default life map, we can call the DecrementLife
     *  function which will check if the map_key already exists. If so,
     *  the life value is decremented (write count incremented). Otherwise 
     *  the map_key is inserted with a write count of 1.
     */
    uint64_t rowSize;
    uint64_t wordSize;
    uint64_t currentBit;
    uint64_t flipPartitions;
    uint64_t rowPartitions;
    int *modifyCount;

    wordSize = p->BusWidth;
    wordSize *= p->tBURST * p->RATE;
    wordSize /= 8;

    rowSize = p->COLS * wordSize;
    rowPartitions = ( rowSize * 8 ) / fpSize;
    //flipPartitions即为可以分为多少个fpSize大小的word

    flipPartitions = ( wordSize * 8 ) / fpSize; 
    //每个word设置一个modifyCount来记录该word的修改位数

    modifyCount = new int[ flipPartitions ];

    /*
     *  Count the number of bits that are modified. If it is more than half, then we will invert the data then write.

     */
    for( uint64_t i = 0; i < flipPartitions; i++ )
        modifyCount[i] = 0;

    currentBit = 0;

    /* Get what is currently in the memory (i.e., if it was previously flipped, get the flipped data. */
    for( uint64_t i = 0; i < flipPartitions; i++ )
    {
        uint64_t curAddr = row * rowPartitions + col * flipPartitions + i;

        if( flippedAddresses.count( curAddr ) )
        {
            InvertData( oldData, i*fpSize, (i+1)*fpSize );
        }
    }

    /* Check each byte to see if it was modified */
    for( uint64_t i = 0; i < wordSize; ++i )
    {
        /*
         *  If no bytes have changed we can just continue. Yes, I know this will check the byte 8 times, but i'd rather not change the iter.

         */
        uint8_t oldByte, newByte;

        oldByte = oldData.GetByte( i );
        newByte = newData.GetByte( i );

        if( oldByte == newByte )
        {
            currentBit += 8;
            continue;
        }

        /*
         *  If the bytes are different, then at least one bit has changed. check each bit individually.

         */
        for( int j = 0; j < 8; j++ )
        {
            uint8_t oldBit, newBit;

            oldBit = ( oldByte >> j ) & 0x1;
            newBit = ( newByte >> j ) & 0x1;

            if( oldBit != newBit )
            {
                modifyCount[(int)(currentBit/fpSize)]++;
            }

            currentBit++;
        }
    }

    /*
     *  Flip any partitions as needed and mark them as inverted or not.
     */
    for( uint64_t i = 0; i < flipPartitions; i++ )
    {
        bitCompareSwapWrites += modifyCount[i];

        uint64_t curAddr = row * rowPartitions + col * flipPartitions + i;

        /* Invert if more than half of the bits are modified. */
        if( modifyCount[i] > (fpSize / 2) )
        {
            InvertData( newData, i*fpSize, (i+1)*fpSize );

            bitsFlipped += (fpSize - modifyCount[i]);

            /*
             *  Mark this address as flipped. If the data was already inverted, it
             *  should remain as inverted for the new data.
             */
            if( !flippedAddresses.count( curAddr ) )
            {
                flippedAddresses.insert( curAddr );
            }
        }
        else
        {
            /*
             *  This data is not inverted and should not be marked as such.
             */
            if( flippedAddresses.count( curAddr ) )
            {
                flippedAddresses.erase( curAddr );
            }

            bitsFlipped += modifyCount[i];
        }
    }

    delete modifyCount;

    return rv;
}

void FlipNWrite::CalculateStats( )
{
    if( bitCompareSwapWrites != 0 )
        flipNWriteReduction = (((double)bitsFlipped / (double)bitCompareSwapWrites)*100.0);
    else
        flipNWriteReduction = 100.0;
}