/*
  James Aldis
  TI France
  OCP TL1 Channel Timing Distribution
  29 June 2005

  C++ Header File for:
    multi-threaded thread-busy-exact test request-splitter/response-merger
*/


// multiple inclusion protection
#ifndef TL1_TIMING_SPLITTER_H
#define TL1_TIMING_SPLITTER_H


#include "tl1_timing_common.h"


class tl1_timing_splitter : 
  public sc_module,
  public OCP_TL1_Slave_TimingIF,
  public OCP_TL1_Master_TimingIF
{
  SC_HAS_PROCESS(tl1_timing_splitter);
  public:
    tl1_timing_splitter(sc_module_name name, int nr_threads) :
      sc_module(name), ocps("ocps"), ocpmA("ocpmA"), ocpmB("ocpmB"),
      threads(nr_threads) {

      SC_METHOD(clock_rising);
      sensitive(clk.pos());
      dont_initialize();

      SC_METHOD(response_arb);
      sensitive(arbiter_event);
      dont_initialize();

      SC_METHOD(request_split);
      sensitive(splitter_event);
      dont_initialize();

      req_reg = new OCP_REQ[threads];
      reqs_out = new int[2*threads];
      reqs_out_A = reqs_out;
      reqs_out_B = &(reqs_out[threads]);
      for(int i=0; i<2*threads; i++) {
        reqs_out[i] = 0;
      }

      resp_reg = new OCP_RESP[2*threads];
      respA_reg = resp_reg;
      respB_reg = &(resp_reg[threads]);

      time_quantum = sc_get_time_resolution();
      sthreadbusy_sample_time = time_quantum;  // initial guess
      request_sample_time = time_quantum;  // initial guess
      mthreadbusy_sample_time = time_quantum;  // initial guess
      response_sample_time = time_quantum;  // initial guess
    };

    ~tl1_timing_splitter() {
      cout << "Deleting splitter:   " << name() << endl;

      for(int i=0; i<threads; i++) {
        cout << (req_reg[i].MCmd != OCP_MCMD_IDLE ? "R " : "- ");
      }
      cout << endl;
      for(int i=0; i<threads; i++) {
        cout << (respA_reg[i].SResp != OCP_SRESP_NULL ? "A " : "- ");
      }
      cout << endl;
      for(int i=0; i<threads; i++) {
        cout << (respB_reg[i].SResp != OCP_SRESP_NULL ? "B " : "- ");
      }
      cout << endl;
      for(int i=0; i<threads; i++) {
        cout << reqs_out_A[i] << " ";
      }
      cout << endl;
      for(int i=0; i<threads; i++) {
        cout << reqs_out_B[i] << " ";
      }
      cout << endl;
      cout << "  SThreadBusy sample time: " << sthreadbusy_sample_time << endl;
      cout << "  MThreadBusy sample time: " << mthreadbusy_sample_time << endl;
      cout << "  Request sample time:     " << request_sample_time << endl;
      cout << "  Response sample time:     " << response_sample_time << endl;
    }

    // ports
    sc_in<bool> clk;
    OCP_SLAVE_PORT ocps;
    OCP_MASTER_PORT ocpmA, ocpmB;

    // processes
    void clock_rising() {

      // generate MThreadBusy for both master ports, based on
      // register occupation
      int mtbA_out = 0;
      int mtbB_out = 0;
      int mask = 1;
      for(int i=0; i<threads; i++) {
        if(respA_reg[i].SResp != OCP_SRESP_NULL) {
          mtbA_out |= mask;
        }
        if(respB_reg[i].SResp != OCP_SRESP_NULL) {
          mtbB_out |= mask;
        }
        mask <<= 1;
      }
      ocpmA->putMThreadBusy(mtbA_out);
      ocpmB->putMThreadBusy(mtbB_out);

      // generate SThreadBusy for slave port, based on request register
      // occupation (we prevent any thread from having more than one
      // target open at a time, to avoid response reordering)
      int stb_out = 0;
      mask =1;
      for(int i=0; i<threads; i++) {
        if(req_reg[i].MCmd != OCP_MCMD_IDLE) {
          stb_out |= mask;
        }
        mask <<= 1;
      }
      ocps->putSThreadBusy(stb_out);

      // notify events for the other methods
      sc_time split_time = sc_max(request_sample_time, sthreadbusy_sample_time);
      // note that arbitration of responses must occur later than splitting of
      // reqests, in order that new responses do not distort the number of
      // requests outstanding and therefore change the arbitration result - also
      // potentially creating a combinatorial loop
      sc_time arb_time = sc_max(split_time + sc_time(1.0,SC_PS),
                   sc_max(response_sample_time, mthreadbusy_sample_time));
      arbiter_event.notify(arb_time);
      splitter_event.notify(split_time);
    }

    void response_arb() {
      // priority to port A, lowest numbered thread first
      // registers can be used transitorially, so there is not
      // a minimum 1 cycle latency
      OCP_RESP tmp;
      if(ocpmA->getOCPResponse(tmp)) {
        // adjust SData to compensate for what we did to the address
        // in the request
        tmp.SData >>= 1;
        respA_reg[tmp.SThreadID] = tmp;
      }
      if(ocpmB->getOCPResponse(tmp)) {
        // adjust SData to compensate for what we did to the address
        // in the request
        tmp.SData >>= 1;
        respB_reg[tmp.SThreadID] = tmp;
      }

      // sample MThreadBusy and replicate it to make the loop easy
      int mtb = ocps->getMThreadBusy();
      mtb =  (mtb & ((1 << threads) - 1)) | (mtb << threads);
      int mask = 1;
      for(int i=0; i<(2*threads); i++) {
        if((resp_reg[i].SResp != OCP_SRESP_NULL) && !(mask & mtb)) {
          // grant this one
          ocps->startOCPResponse(resp_reg[i]);
          resp_reg[i].SResp = OCP_SRESP_NULL;
          reqs_out[i]--;
          break;
        }
        mask <<= 1;
      }
    }

    void request_split() {
      // add any new request to the register for that thread (can not
      // be occupied because of busy-exact).  this is not a systematic
      // cycle of latency because it can be emptied straight away
      OCP_REQ tmp;
      if(ocps->getOCPRequest(tmp)) {
        req_reg[tmp.MThreadID] = tmp;
      }

      // sample sthreadbusy
      int stbA = ocpmA->getSThreadBusy();
      int stbB = ocpmB->getSThreadBusy();

      // choose a request from one thread (lowest first, ignoring those
      // for which sthreadbusy is high or where the thread has requests
      // outstanding on the other port) and send it to the appropriate
      // slave
      for(int i=0; i<threads; i++) {
        if(req_reg[i].MCmd != OCP_MCMD_IDLE) {
          // routing on bit[30]
          bool dest_is_A = (req_reg[i].MAddr & 0x40000000);
          int *otherport_out = (dest_is_A ? reqs_out_B : reqs_out_A);
          int stb = (dest_is_A ? stbA : stbB);
          if((otherport_out[i] == 0) && !(stb & 1)) {
            // can send request.
            // rescale address to get balanced system-address map
            req_reg[i].MAddr <<= 1;
            OCP_MASTER_PORT *dest = (dest_is_A ? &ocpmA : &ocpmB);
            (*dest)->startOCPRequest(req_reg[i]);
            req_reg[i].MCmd = OCP_MCMD_IDLE;
            int *thisport_out = (dest_is_A ? reqs_out_A : reqs_out_B);
            thisport_out[i]++;
            break;
          }
        }
        stbA >>= 1;
        stbB >>= 1;
      }
    }

    void end_of_elaboration() {
      cout << "<<<< E-O-E >>>> " << name() << endl;

      // inform OCP channels that this module is non-default-timing
      ocps->setOCPTL1SlaveTiming(calc_s_timing());
      ocpmA->setOCPTL1MasterTiming(calc_m_timing());
      ocpmB->setOCPTL1MasterTiming(calc_m_timing());

      // inform OCP channels that this module is timing-sensitive
      ocpmA->registerTimingSensitiveOCPTL1Master(this);
      ocpmB->registerTimingSensitiveOCPTL1Master(this);
      ocps->registerTimingSensitiveOCPTL1Slave(this);
    }

    // when informed of master port timing, merger must re-inform the OCP
    // channels if anything changed
    void setOCPTL1SlaveTiming(OCP_TL1_Slave_TimingCl slave_timing) {
      cout << "  << S-S-T >>   " << name() << endl;

      // increase in response group input time must be recorded
      // and may have caused an increase in response group output time
      // on slave port, which must be reported
      if(slave_timing.ResponseGrpStartTime + time_quantum
                > response_sample_time) {
        // calculate current response output time
        sc_time old_resp_t_out = calc_s_timing().ResponseGrpStartTime;
        // store new response sample time
        response_sample_time = slave_timing.ResponseGrpStartTime + time_quantum;
        // calculate new response output time
        sc_time new_resp_t_out = calc_s_timing().ResponseGrpStartTime;
        if(new_resp_t_out > old_resp_t_out) {
          ocps->setOCPTL1SlaveTiming(calc_s_timing());
        }
      }

      // increase in sthreadbusy input time must be recorded
      // may have caused an increase in request group output time
      // on master ports, which must be reported
      if(slave_timing.SThreadBusyStartTime + time_quantum
                > sthreadbusy_sample_time) {
        // calculate current request output time
        sc_time old_req_t_out = calc_m_timing().RequestGrpStartTime;
        // store new threadbusy sample time
        sthreadbusy_sample_time = slave_timing.SThreadBusyStartTime + time_quantum;
        // calculate new request output time
        sc_time new_req_t_out = calc_m_timing().RequestGrpStartTime;
        if(new_req_t_out > old_req_t_out) {
          ocpmA->setOCPTL1MasterTiming(calc_m_timing());
          ocpmB->setOCPTL1MasterTiming(calc_m_timing());
        }
      }
    }

    // when informed of slave port timing, merger must re-inform the OCP
    // channels if anything changed
    void setOCPTL1MasterTiming(OCP_TL1_Master_TimingCl master_timing) {
      cout << "  << S-M-T >>   " << name() << endl;

      // increase in request group input time must be recorded
      // may have caused an increase in request group output time on
      // master ports, which must be reported
      if(master_timing.RequestGrpStartTime + time_quantum
                > request_sample_time) {
        // calculate current request output time
        sc_time old_req_t_out = calc_m_timing().RequestGrpStartTime;
        // store new request sample time
        request_sample_time = master_timing.RequestGrpStartTime + time_quantum;
        // calculate new request output time
        sc_time new_req_t_out = calc_m_timing().RequestGrpStartTime;
        if(new_req_t_out > old_req_t_out) {
          ocpmA->setOCPTL1MasterTiming(calc_m_timing());
          ocpmB->setOCPTL1MasterTiming(calc_m_timing());
        }
      }

      // increase in mthreadbusy input time must be recorded
      // may have caused an increase in response group output time
      // on slave port, which must be reported
      if(master_timing.MThreadBusyStartTime + time_quantum
                > mthreadbusy_sample_time) {
        // calculate current response output time
        sc_time old_resp_t_out = calc_s_timing().ResponseGrpStartTime;
        // store new threadbusy sample time
        mthreadbusy_sample_time = master_timing.MThreadBusyStartTime + time_quantum;
        // calculate new response output time
        sc_time new_resp_t_out = calc_s_timing().ResponseGrpStartTime;
        if(new_resp_t_out > old_resp_t_out) {
          ocps->setOCPTL1SlaveTiming(calc_s_timing());
        }
      }
    }

  private:
    sc_time time_quantum;
    sc_time sthreadbusy_sample_time;
    sc_time request_sample_time;
    sc_time mthreadbusy_sample_time;
    sc_time response_sample_time;

    // helper functions: only request and response groups are non-default
    // timing.
    // note that the timing on the two OCP master ports is the same in this
    // module
    OCP_TL1_Master_TimingCl calc_m_timing() {
      OCP_TL1_Master_TimingCl to;
      to.RequestGrpStartTime =
                sc_max(sthreadbusy_sample_time, request_sample_time);
      return(to);
    }
    OCP_TL1_Slave_TimingCl  calc_s_timing() {
      OCP_TL1_Slave_TimingCl to;
      to.ResponseGrpStartTime =
                sc_max(mthreadbusy_sample_time, response_sample_time);
      return(to);
    }

    int threads;
    sc_event arbiter_event, splitter_event;
    OCP_REQ *req_reg;
    int *reqs_out, *reqs_out_A, *reqs_out_B;
    OCP_RESP *resp_reg, *respA_reg, *respB_reg;
};


// end of multiple inclusion protection
#endif

