.const BLOCKS_PER_PART = 9
.const BYTES_PER_BLOCK = 208

.label memory_start = $0000
.label memory_start_lo = $0000
.label memory_start_hi = $0001
.label count = $0002
.label p1 = $0003
.label p2 = $0004
.label block_map = $0005

.var C0 = 80
.var C1 = 133
.var C2 = 102
.var C3 = 68

* = $0702
        lda @loader_1c00:#$64  // Overwritten every load.
        sta $1c00

        // switch from square wave generation to loading
        lda #$6f
        sta $1c02
        sta $1c0c  // ee is the default, but 6f works, too.
        lda #$41
        sta $1c0b

        // transfer timer
tagain: lda $1808
        bpl tagain
        sec
        sbc #$19+22
        ldy $1809
        beq tagain

        // 1804/5 = a/y
        // 1c04/5 = a/y - 1
        // 1806 = $4cc6
        // 1807 = $4cc7
        ldx #$fc
fill:
        sta $1804-$fc, x
        sbc #$01
        sta $1c04-$fc, x
        tya
        sta $1805-$fc, x
        sta $1c05-$fc, x
        ldy #>(FRAME_CYCLES-2)  // 4d
        lda #<(FRAME_CYCLES-2)  // fe
        inx
        inx
        bne fill

        // Wait for motor to settle, and fill zero page with 9.
        // We need to delay about 1000 cycles.
        lda #BLOCKS_PER_PART
j:      sta $00, x  // store count, with some collateral damage
        inx
        bne j
        bit $1c01
again:
        bit $1c00
        bmi *-3
        bit $1c01 // ack sync byte / clear interrupt/SO /  clear CA1/CA2 active edge
        clv
        bvc *  // wait for byte 0
        lda $1c01
        clv
        cmp #$55  // body (0x07)
        bne again

        // sec
        ldy #$01
header: bvc *  // wait for part number
        lda $1c01
        clv
        bvc *
        eor $1c01
        clv
        bcc memory
        beq again  // non-allocated block
        clc
        tax
        lda block_map-1, x
        bpl header
        bmi again  // we already loaded this part
memory: sta memory_start, y
        dey
        bpl header

        // clc  // not needed because of anc above
        ldy #BYTES_PER_BLOCK
read_block:
        bvc *

        lda $1c01
        adc @constant_c1:#C1
        sta.zp p1
        adc @constant_c3:#C3
        sta.zp p2

        // We can't wait for V, since we might have inadvertently
        // cleared V through an adc. So just delay long enough for
        // the next byte to roll in.
        inc block_map-1, x
        inc block_map-1, x
        dey

        lda @constant_c0:#C0
        adc $1c01
        clv
        eor.zp p1
        sta.abs (memory_start), y
        bvc *
        lda $1c01
        adc @constant_c2:#C2
        clv
        eor.zp p2
        dey
        sta.abs (memory_start), y
        bne read_block

        dec count
        bne again

        // advance motor
        lda $1c00
        ora #$01
        sta $1c00
        eor #$03
        // x is last block number
        // y is 0
        // block map entries are $d9.

        // Delay 6399 cycles.
        // We always do this delay loop, to make x=0 and clear $00-$0f.
        ldx #$10
delay2:
        // 389 cycles
        ldy #78
        dey
        bne *-1

        // 11 cycles
        lsr.zp $ff, x
        dex
        bne delay2

        // move motor to next track
        sta $1c00

        // pad frames & wait for a good opportunity for timer transfer.
wait:   lda $1c05
        beq wait
        lda $1c04
        bpl wait
        sec
        sbc $1804
        bmi out
        cmp #$07
        bcs wait
out:
        // transfer timer. We know 1804/1c04 >= ~$80 and $1805 > 0.
        lda $1804
        ldy $1805
        sta $1808
        sty $1809

        // Set the timers to a very high frequency, so that when the first note of
        // the music is played, it's played immediately.
        stx $1805
        stx $1c05

        // square wave generation
        lda #$ef
        sta $1c02
        lda #$c0
        sta $1c0b

        dex
        txs
        jmp ($0700)