////////////////////////////////////////////////////////////////////////
//
//  pico-flash: flash memory writer for Pico-2 (Pico-2 program)
//      version 2.0 (August 31, 2009)
//      Hideki Kozima (xkozima@nict.go.jp)
//
//  memory allocation (flash.x)
//      ram    : o = 0xffffd800, l = 0x00002700
//      stack  : o = 0xffffff00, l = 0x00000100
//
//  function:
//      Read 128-byte blocks from the serial port (SCI3), write the
//      blocks to flash memory (0x00000000-0x0003ffff).
//
//  direction:
//      1. power-on your Pico-2 in boot mode (with SW6-7 on)
//      2. host> pc-boot pico-flash.sr
//      3. host> pc-flash your-program.sr
// 

#include "sh7046f.h"

//
//  bitwise operation

#define  bit_op(var,mask,data)  (var=((var)&(~(mask)))|(data))

//
//  __main: dummy function

void  __main (void) {}

////////////////////////////////////////////////////////////////////////
//
//  boot: pico2 first initialization
//        (just in order to avoid hardware collision)

void  boot (void)
{
    void  sectionInit(void);
    int   main(void);

    //  PortA initialization
    //      PA7-0: in (tentative)
    //      PA8: RxD, PA9: TxD, 
    //      PA10: out=0 (TE)
    //      PA15-11: in (tentative)
    PFC.PACRL1.WORD = 0x0005;
    PFC.PACRL2.WORD = 0x0000;
    PFC.PACRL3.WORD = 0x0300;
    PFC.PAIORL.WORD = 0x0400;
    PA.DR.WORD = 0x0000;                        //  TE off

    //  PortB initialization
    //      PB5-2: in (tentative)
    PFC.PBCR1.WORD = 0x0000;
    PFC.PBCR2.WORD = 0x0000;
    PFC.PBIOR.WORD = 0x0000;			//  PB2-5 in

    //  PortE initialization
    //      PE21-17: in  (SWs)
    //      PE16-12: out (LEDs)
    //      PE11-0 : out (with zero)
    PFC.PECRH.WORD  = 0x0000;
    PFC.PECRL1.WORD = 0x0000;
    PFC.PECRL2.WORD = 0x0000;
    PFC.PEIORH.WORD = 0x0001;
    PFC.PEIORL.WORD = 0xffff;
    PE.DRH.WORD = 0x0000;                       //  LED5   on
    PE.DRL.WORD = 0x0000;                       //  LED4-0 on

    //  initialize memory
    sectionInit();
}

//
//  memory initialization (cf. rom.x)
//      .text [_stext, _etext] for code
//      .data [_sdata, _edata] for variables with init
//      .bss  [_sbss,  _ebss ] for variables without init
//  note: .data has to be copied from the end of .text area.

extern char  mdata, sdata, edata, sbss, ebss;

void  sectionInit (void)
{
    char  *src;
    char  *dst;

    //  initialize .data area
    //      readonly image is located in [.etext, ...], 
    //      so copy to read/write .data area
    src = &mdata;
    dst = &sdata;
    while (dst < &edata)
        *dst++  = *src++;

    //  clear .bss area
    for ( dst = &sbss; dst < &ebss; dst++ )
        *dst = 0;
}

////////////////////////////////////////////////////////////////////////
//
//  LED: indicators (LED1-5 = PE16-12)
//      void   LED_init (void);
//          initialize LED (PE16-12: out).
//      void   LED_set (ushort  dataW5);
//          set 5bits data to LED port.

void  LED_init (void)
{
    //  PE16-12: I/O
    bit_op(PFC.PECRH.WORD,  0x0003, 0x0000);
    bit_op(PFC.PECRL1.WORD, 0xff00, 0x0000);

    //  PE16-12: output
    bit_op(PFC.PEIORH.WORD, 0x0001, 0x0001);
    bit_op(PFC.PEIORL.WORD, 0xf000, 0xf000);
}

void  LED_set (ushort dataW5)
{
    ushort  dataH, dataL;

    //  set hi/lo
    dataH = (~dataW5 & 0x001f) >> 4;
    dataL = (~dataW5 & 0x001f) << 12;

    //  set PE16-12 (= DRH[0],DRL[15-12])
    bit_op(PE.DRH.WORD, 0x0001, dataH);
    bit_op(PE.DRL.WORD, 0xf000, dataL);
}

////////////////////////////////////////////////////////////////////////
//
//  SCI: serial port (RS485) controller, blocking version
//      void  SCI_init ();
//          initialize SCI3 (PA8:RxD, PA9:TxD) as 38400bps 1-8N-1.
//      void  SCI_transmit (uchar data);
//          transmit a byte data.
//      void  SCI_transmit (uchar *data);
//          receive a byte data.

void  SCI_init ()
{
    int  i;

    //  wake up SCI3 (from stand-by mode)
    bit_op(MSTCR.MSTCR1.WORD, 0x0008, 0x0000);

    //  disable SCI3 while setup
    bit_op(SCI3.SCR.BYTE,  0xfc, 0x00);         //  TE/RE/... are all off

    //  select clock source
    bit_op(SCI3.SCR.BYTE,  0x03, 0x00);         //  use internal clock

    //  setup data format (1-8N-1)
    bit_op(SCI3.SMR.BYTE,  0xff, 0x00);         //  async, 1-8N-1, P/1
    bit_op(SCI3.SDCR.BYTE, 0x08, 0x00);         //  LSB-first (as usual)

    //  set baud 38400 (n:N=0:19)
    //      P=24576000, B=P/(32*(N+1)) -> B=38400, N=19
    SCI3.BRR = 19;

    //  wait for the 1bit period
    //      49152000/38400 = 1280T
    //          fastest code:  Lable  DT  Rn      (1)
    //                                BT  Lable   (3)
    //      1280/4 = 320
    for (i = 0; i < 320; i++);

    //  pin re-assignment
    //      PA08: RxD
    //      PA09: TxD
    //      PA10: TE (transmitter enable for the RS485 driver)
    bit_op(PFC.PACRL1.WORD, 0x003f, 0x0005);
    bit_op(PFC.PACRL3.WORD, 0x0700, 0x0300);
    bit_op(PFC.PAIORL.WORD, 0x0400, 0x0400);    //  PA10 out for TE
    bit_op(PA.DR.WORD,      0x0400, 0x0000);    //  driver disable

    //  enable transmitter and receiver
    bit_op(SCI3.SCR.BYTE, 0x30, 0x30);          //  TE/RE on

    //  enable the RS485 driver (TE10<-1)
    bit_op(PA.DR.WORD, 0x0400, 0x0400);         //  driver enable
}

void  SCI_transmit (uchar data)
{
    //  double check TDRE
    while ((SCI3.SSR.BYTE & 0x80) == 0);

    //  send the byte data and clear TDRE
    SCI3.TDR = data;
    bit_op(SCI3.SSR.BYTE, 0x80, 0x00);
}

void  SCI_receive (uchar *data)
{
    //  double check RDRF
    while ((SCI3.SSR.BYTE & 0x40) == 0);

    //  receive the byte data and clear RDRF
    *data = SCI3.RDR;
    bit_op(SCI3.SSR.BYTE, 0x40, 0x00);
}

////////////////////////////////////////////////////////////////////////
//
//  usleep: micro-second wait
//      void  usleep (uint  n);
//          blocks the flow of control by about n microsecond
//      void  usleep10 (uint  n);
//          blocks the flow of control by n*10 microsecond

void  usleep (uint  n)
{
    uint  times, i;

    //  this is rough approximation
    times = 4 * n;
    for (i = 0; i < times; i++);
}

void  usleep10 (uint  n)
{
    uint  times, i;

    //  we empirically found the followings. 
    times = 38 * n;
    for (i = 0; i < times; i++);
}

////////////////////////////////////////////////////////////////////////
//
//  FLA: writing flash memory
//      int  FLA_write (uint fla_addr, uchar *Sbuffer);
//          Writes 128-byte Sbuffer to the flash memory at the address.
//          (If writing data less then 128 bytes, fill the blank with
//          0xff to make it exactly 128 bytes long.)  The address has to
//          have 0x00 or 0x80 at the lowest byte.  On success, returns
//          0; on error, returns -1.

#define  FLA_N  1000                    //  maximum rewriting iteration

union un_flamem {
    uchar  BYTES[262144];
    uint   LONGS[65536];
};

#define FLA_memory (*(volatile union un_flamem *)0x00000000)

void  FLA_pulse (uint usec10)
{
    //  set PSU (program setup) of FLMCR1
    bit_op(FLASH.FLMCR1.BYTE, 0x10, 0x10);

    //  wait t_spsu (min: 50 usec)
    usleep(60);

    //  set P (program) of FLMCR1 *** start baking ***
    bit_op(FLASH.FLMCR1.BYTE, 0x01, 0x01);

    //  wait t_sp
    usleep10(usec10);

    //  clear P (program) of FLMCR1 *** stop baking ***
    bit_op(FLASH.FLMCR1.BYTE, 0x01, 0x00);

    //  wait t_cp (min: 5 usec)
    usleep(6);

    //  clear PSU (program setup) of FLMCR1
    bit_op(FLASH.FLMCR1.BYTE, 0x10, 0x00);

    //  wait t_cpsu (min: 5 usec)
    usleep(6);
}

int  FLA_write (uint fla_addr, uchar *data)
{
    int  i, n;
    union {
        uchar  BYTES[128];              //  for byte access (transfer)
        uint   LONGS[32];               //  for long access (verify)
    } Sbuffer,                          //  Source
      Rbuffer,                          //  Rewrite
      Abuffer;                          //  Addwrite

    //  set SWE (software write enable) of FLMCR1
    bit_op(FLASH.FLMCR1.BYTE, 0x40, 0x40);

    //  wait t_sswe (min: 1 usec)
    usleep(2);

    //  copy 128-byte source data to rewrite buffer
    for (i = 0; i < 128; i++) {
        Sbuffer.BYTES[i] = data[i];
        Rbuffer.BYTES[i] = data[i];
    }

    //  writing
    for (n = 1; n < FLA_N; n++) {
        uint  j, m = 0;
        uint  usec10;

        //  transfer the rewrite data to the flash memory
        for (j = 0; j < 128; j++)
            FLA_memory.BYTES[fla_addr + j] = Rbuffer.BYTES[j];

        //  writing pulse
        if (n <= 6)
            usec10 = 3;                 //  t_sp:  30 usec
        else
            usec10 = 20;                //  t_sp: 200 usec
        FLA_pulse(usec10);

        //  set PV (program verify) of FLMCR1
        bit_op(FLASH.FLMCR1.BYTE, 0x04, 0x04);

        //  wait t_spv (min: 4 usec)
        usleep(5);

        //  long-int-wise verification
        for (j = 0; j < 32; j++) {
            uint  verify, *fla_uint;

            //  dummy write
            FLA_memory.BYTES[fla_addr + 4*j] = 0xff;

            //  wait t_spvr (min: 2 usec)
            usleep(10);

            //  read verify data (as a long word)
            verify = FLA_memory.LONGS[fla_addr/4 + j];

            //  verify the data
            if (verify != Sbuffer.LONGS[j]) m = 1;

            //  make addwrite
            if (n <= 6)
                Abuffer.LONGS[j] = Rbuffer.LONGS[j] | verify;

            //  make rewrite
            Rbuffer.LONGS[j] = Sbuffer.LONGS[j] | (~verify);
        }

        //  clear PV (program verify) of FLMCR1
        bit_op(FLASH.FLMCR1.BYTE, 0x04, 0x00);

        //  wait t_cpv (min: 2 usec)
        usleep(3);

        //  add-write
        if (n <= 6) {
            //  transfer the addwrite data block (128 bytes)
            for (j = 0; j < 128; j++)
                FLA_memory.BYTES[fla_addr + j] = Abuffer.BYTES[j];

            //  writing pulse
            FLA_pulse(1);
        }

        //  check success
        if (m == 0) {
            //  clear SWE (software write) of FLMCR1
            bit_op(FLASH.FLMCR1.BYTE, 0x40, 0x00);

            //  wait t_cswe (min: 100 usec)
            usleep(120);

            //  successful end
            return  0;
        }
    }

    //  failed: clear SWE (software write) of FLMCR1
    bit_op(FLASH.FLMCR1.BYTE, 0x40, 0x00);

    //  wait t_cswe (min: 100 usec)
    usleep(120);

    //  failure end
    return  -1;
}

////////////////////////////////////////////////////////////////////////
//
//  main of pico2-emu1

int  main ()
{
    //  preparation
    boot();

    //  LED init
    LED_init();
    LED_set(0x1f);

    //  SCI setup (38400 1-8-1)
    SCI_init();

    //  read data from SCI
    //      host: <fla-addr1> <byte1> <byte2> ... <byte128>
    //      pico: 0x00
    //      host: <fla-addr2> <byte1> <byte2> ... <byte128>
    //      pico: 0x00
    //            ...
    //      host: <0xffffffff>
    //      pico: 0xff
    for (;;) {
        uchar  a1, a2, a3, a4;
        uint   fla_addr;
        uchar  data[128];
        int    i;

        //  read fla-addr
        SCI_receive(&a1);
        SCI_receive(&a2);
        SCI_receive(&a3);
        SCI_receive(&a4);
        fla_addr = (a1 << 24) | (a2 << 16) | (a3 << 8) | a4;

        //  check end of file
        if (fla_addr == 0xffffffff) {
            //  send end mark (0xff) back to the host
            SCI_transmit(0xff);

            //  no more data to send
            break;
        }

        //  show block number on LED
        LED_set(fla_addr >> 7);

        //  read 128-byte data
        for (i = 0; i < 128; i++)
            SCI_receive(data + i);

        //  writing to flash memory
        FLA_write(fla_addr, data);

        //  send ack
        SCI_transmit(0x00);
    }

    //  end of the program
    LED_set(0x1f);
    for (;;);

    return 0;
}

////////////////////////////////////////////////////////////////////////

