;	Compile with standalone=0
;	Then compile ST-NICCC-Blitter-Atarimax.asm

;	Die 64k tabelle ist ein count table für den blitter
;	Die MULS sind nur für Setup für die Steigung
;	Aber der blitter interpoliert
;	Und da das Ding kein fraction hat
;	Nehme ich eine 8.8 Tabelle
;	Da läuft der Blitter durch
;	Deswegen ist es so schnell


	.if .not .def standalone
	.def standalone = 1
	.endif

blitter_only=0

.if standalone=0
	stream_data = $a000
	stream_end  = $c000
	opt f+
.else
	stream_data = $8000
	stream_end  = $c000
	opt f-
.endif

	icl "SystemEquates.asm"
	icl "VBXE.asm"

chrset		equ $d800
gradientmap	equ $4000 ;xl ram

square1_lo	equ $c000
square1_hi	equ square1_lo+512
square2_lo	equ square1_hi+512
square2_hi	equ square2_lo+512

 ;in local fast ram ad
bcbstartlo	equ $c800	;:256	.byte <($0300+#*21)
bcbstarthi	equ $c900 	;:256	.byte >($0300+#*21)
bcbsendlo	equ $ca00 	;:256	.byte <($4300+#*21+20)
bcbsendhi	equ $cb00	;:256	.byte >($4300+#*21+20)	
mul32		equ $cc00
lsrf0tab	equ $cd00

palette		equ $cf00 ;3*16 = $30 bytes

vertxs	equ $fc00 
vertys	equ $fd00 
indices equ $fe00

screencounter equ 1

miny	equ $03		;1 byte
bank	equ $06		;1 byte
maxy	 equ $08	;1 byte
pointer	equ $0f 	;1 bytes

sx1	equ $11		;1 byte
sx2	equ $12		;1 byte
cloc	equ $13		;2 bytes
sy1	equ $16		;1 byte
sy2	equ $17		;1 byte
screen  equ $4020
y2	equ $27		;1 byte


	org y2+1
T1 	org *+2		;2 bytes
T2	org *+2		;2 bytes
PRODUCT org *+4		;2 bytes


flags		equ $a2	;1 byte
numframeverts	equ $a3	;1 byte
polytype	equ $a4	;1 byte
vertcount	equ $a8	;1 byte
verti		equ $a9	;1 byte
polycol		equ $4022
palflag		equ $ac ;1 byte
timer		equ $ad	;6 bytes
screentab	equ $b3 ;8 bytes
polymask	equ $bc ;2 bytes
stream_direction equ $be ;1 byte
no_music_flag equ $bf

get_byte	equ $df ;-$ff

redges		equ $7d00
ledges		equ $7e00


	org $0800	
	jmp init_cont

;
; LZSS Compressed SAP player for 16 match bits
; --------------------------------------------
;
; (c) 2020 DMSC
; Code under MIT license, see LICENSE file.
;
; This player uses:
;  Match length: 8 bits  (1 to 256)
;  Match offset: 8 bits  (1 to 256)
;  Min length: 2
;  Total match bits: 16 bits
;
; Compress using:
;  lzss -b 16 -o 8 -m 1 input.rsap test.lz12
;
; Assemble this file with MADS assembler, the compressed song is expected in
; the `test.lz16` file at assembly time.
;
; The plater needs 256 bytes of buffer for each pokey register stored, for a
; full SAP file this is 2304 bytes.
;
;    org $80

;chn_copy    .ds     9
;chn_pos     .ds     9
;bbptr        .ds     2
;cur_pos     .ds     1
;chn_bits    .ds     1

;bit_data    .byte   1




.proc	NMI
	bit $d40f
	bpl vbl
    pha
    txa
    pha
    tya
    pha
;   lda #15
;  	sta $d01a
    jsr $3403 ;rmtplayer
;   lda #0
;   sta $d01a
@  	pla
    tay
    pla
    tax
    pla
    rti
vbl
	sta vbl_a+1
	stx vbl_x+1
	sty vbl_y+1

	dec cloc
	bne @+
	dec cloc+1
@	

stop	nop
	nop
	nop
	sed
	lda timer
	clc
	adc #2
	sta timer
	lda timer+1
	adc #0
	sta timer+1
	cmp #$60
	bcc @+
	lda #0
	sta timer+1
	lda timer+2
	adc #0
	sta timer+2
	cmp #$60
	bcc @+
	lda #0
	sta timer+2	
@	
	lda timer
	and #$0f
	tax
	lda digits,x
	sta text+11
	lda timer
	lsr
	lsr
	lsr
	lsr
	tax
	lda digits,x
	sta text+10
	lda timer+1
	and #$0f
	tax
	lda digits,x
	sta text+8
	lda timer+1
	lsr
	lsr
	lsr
	lsr
	tax
	lda digits,x
	sta text+7
	lda timer+2
	and #$0f
	tax
	lda digits,x
	sta text+5
	lda timer+2
	lsr
	lsr
	lsr
	lsr
	tax
	lda digits,x
	sta text+4
	cld	
skip

vbl_a	lda #0
vbl_x	ldx #0
vbl_y	ldy #0

nmi_end	rti

.endp

.proc init_cont

.if standalone=0
	ldx #4
	stx bank
	sta carctl,x
	stx get_byte_zp.bank_register
.endif
;rmt player init
	lda #0
	ldx #<$2000
	ldy #>$2000
	jsr $3400 ;rmtinit

;copy font
	ldx #0
	stx timer
	stx timer+1
	stx timer+2
	stx VBXE_XDL_ADR0
	stx VBXE_XDL_ADR1
	stx VBXE_XDL_ADR2
	stx VBXE_CG		; disable collision detection
	stx VBXE_IRQ_CONTROL	; disable IRQ
	lda #$21
	sta screentab
	sta screentab+4
	lda #$22
	sta screentab+1
	sta screentab+5
	lda #$23
	sta screentab+2
	sta screentab+6
	lda #$24
	sta screentab+3
	sta screentab+7
	
@	lda fontcopy,x
	sta chrset,x
	lda #0
	sta palette,x
	inx
	bne @-

@	lda fontcopy+256,x
	sta chrset+256,x
	inx
	bne @-
	
	stx dmactl
  	
	lda #$80	;bank 0
	sta VBXE_MEMAC_B_CONTROL	;cpu-vram access window at $4000
	ldx #12
aa3	lda xdl,x
	sta $4000,x	; copy XDL to vram
	dex
	bpl aa3

	ldx #0
@	lda xdl.copy_bcb,x
	sta $4100,x	;$000200 = $4200 bank #0 
	inx
	cpx #21*9
	bne @-

	ldx #0
a8	lda xdl.cls_bcb,x
	sta $4200,x ;$000200 = $4200 bank #0 
	inx
	cpx #21*2
	bne a8

	ldx #0
@	lda xdl.slope_bcb,x
	sta $4280,x ;$000200 = $4200 bank #0 
	inx
	cpx #21
	bne @-
	
	ldx #0
@	lda xdl.copypalette_bcb,x
	sta $42c0,x ;$000200 = $4200 bank #0 
	inx
	cpx #21
	bne @-
	
	
	ldy #0	
a99	ldx #20
	sty xdl.hline_bcb+7
a9	lda xdl.hline_bcb,x
bcbad	sta $4300,x
	dex
	bpl a9
	lda bcbad+1
	clc
	adc #21
	sta bcbad+1
	scc
	inc bcbad+2
	
	iny
	cpy #240
	bne a99
	lda #0 ;stop blitlist
	sta $4300+20+239*21
	
	jsr init_gouraud_map
	lda #$80
	sta VBXE_MEMAC_B_CONTROL 
			
@	lda vcount
	bne @-

	sei			;stop IRQ interrupts
	mva #$00 nmien		;stop NMI interrupts	
	mva #$fe portb		;switch off ROM to get 16k more ram	

	ldx #[.len get_byte_zp-1]
@	lda get_byte_rom,x
	sta get_byte,x
	dex
	bpl @-
		
;	jsr memclear
	lda #<nmi
	sta $fffa
	sta $fffa
	lda #>nmi
	sta $fffb
	sta $fffb
	sei
	lda #$c0
	sta nmien
	lda #>chrset
	sta chbase

	
 	ldx #0
@	txa
:5	asl
	sta mul32,x
	inx
	bne @-
 
@ 	txa
:4 	lsr
 	sta lsrf0tab,x
 	inx
 	bne @-
 
	lda #0
	tax
	sta pointer
	lda #$03
	sta pointer+1

@	lda pointer
	sta bcbstartlo,x
	lda pointer+1
	sta bcbstarthi,x
	lda pointer
	clc
	adc #21
	sta pointer
	scc
	inc pointer+1

	inx
	bne @-
	
	lda #20
	sta pointer
	lda #$43
	sta pointer+1
@	lda pointer
	sta bcbsendlo,x
	lda pointer+1
	sta bcbsendhi,x
	lda pointer
	clc
	adc #21
	sta pointer
	scc
	inc pointer+1

	inx
	bne @-
	.endp		;end of init

	jsr generate_square_tables

	jsr init_vbxe

	lda #$80
	sta VBXE_MEMAC_B_CONTROL	
	lda #0
	sta screencounter	
	jsr clear_vram
	inc screencounter
	jsr clear_vram
	inc screencounter
	jsr clear_vram
	inc screencounter
	jsr clear_vram
	inc screencounter
	
	lda #1
	sta VBXE_VIDEO_CONTROL	; enable XDL

@	lda vcount
	bne @-

	mwa #dlistintro dlistl
	mva #34 dmactl
	mva #15 colpf0
	
@	lda timer+1
	cmp #$05
	bcc @-
	
@	lda vcount
	bne @-
	
	mwa #dlist dlistl
	mva #33 dmactl
	ldx #0
	stx timer
	stx timer+1
	stx timer+2
	stx palflag

	jsr copy_palette0

main_demo_loop
	lda #$80
	sta VBXE_MEMAC_B_CONTROL
	ldx screencounter
	lda screentab,x
	sta $4005 ;show that screen
	lda screentab+1,x
	sta screen ;draw to this screen
;	lda palflag
;	seq
;	jsr copy_palette0

;clear actual screen
@	lda VBXE_BLITTER_BUSY
	bne @-
	lda #$00
	sta VBXE_BL_ADR0
	lda #$02		;$000200 = $4200 bank #0
	sta VBXE_BL_ADR1	;blitter addr
	lda #$00;
	sta VBXE_BL_ADR2
	lda #$80
	sta VBXE_MEMAC_B_CONTROL

	ldx screencounter
	lda screentab+2,x
	sta $4208 ;clr_bcb which screen
	lda #0
	sta VBXE_MEMAC_B_CONTROL
	lda #1
	sta VBXE_BLITTER_START
	jsr parse_data
	inc screencounter
	lda screencounter
	and #3
	sta screencounter
	lda stream_direction
	beq main_demo_loop
main_demo_loop2
	lda #$80
	sta VBXE_MEMAC_B_CONTROL
	ldx screencounter
	lda screentab,x
	sta $4005 ;show that screen
	lda screentab+1,x
	sta screen ;draw to this screen
;	lda palflag
;	seq
;	jsr copy_palette0

;clear actual screen
@	lda VBXE_BLITTER_BUSY
	bne @-
	lda #$00
	sta VBXE_BL_ADR0
	lda #$02		;$000200 = $4200 bank #0
	sta VBXE_BL_ADR1	;blitter addr
	lda #$00;
	sta VBXE_BL_ADR2
	lda #$80
	sta VBXE_MEMAC_B_CONTROL

	ldx screencounter
	lda screentab+2,x
	sta $4208 ;clr_bcb which screen
	lda #0
	sta VBXE_MEMAC_B_CONTROL
	lda #1
	sta VBXE_BLITTER_START
	jsr parse_data
	inc screencounter
	lda screencounter
	and #3
	sta screencounter
	lda stream_direction
	bne main_demo_loop2

.proc parse_data
 	;jsr get_byte
	;sta flags
	
;TRACE "x1 = %d" dw(x1+1)	
;	and #1
;	bne @+
;	jsr clear_vram
;@
;check for palette
;	lda flags

;	and #2
;	beq no_palette
;setpalettead
;	jsr set_palette_buffer0
no_palette
;	lda flags
;	and #4 ;is indexed data?
;	beq no_indexed_mode
;	jmp indexed_mode
no_indexed_mode	
do_polys2
	jsr get_byte
	sta polytype
	tax
	lda lsrf0tab,x
	ldy #$80
	sty VBXE_MEMAC_B_CONTROL
;	ora #$80
	sta polycol
	lda #0
	sta VBXE_MEMAC_B_CONTROL
	txa
	and #$0f
	sta vertcount

	txa
	cmp #$fe ;end of frame
	bne @+
	rts
@	
	cmp #$ff ;end of file
	bne @+
	mwa #stream_end get_byte_zp.ptr
	lda #3 ;1 due to auto inc!!! 1+23+5+10
	sta bank
	sta stream_direction
	mva #$4c nmi.stop
	mwa #blinking nmi.stop+1
	inc no_music_flag
	rts
@
	cmp #$fe ;end of 64k
	bne @+
	mwa #stream_end get_byte_zp.ptr
	rts
@
	mva #1 maxy
	mva #200 miny

	ldx vertcount
	jsr get_byte
	sta vertxs,x
	sta vertxs
	jsr get_byte
	sta vertys,x
	sta vertys
	
	ldx #1
do_verts2
	jsr get_byte
	sta vertxs,x
	jsr get_byte
	sta vertys,x
	inx
	cpx vertcount
	bne do_verts2

	lda #0
	sta do_verts3+1
;draw poly
do_verts3 ldx #0
	lda vertxs,x
	sta sx1 ;x1+1
	lda vertys,x
	sta sy1 ;y1+1
	cmp miny
	bcs @+
	sta miny
@
	cmp maxy
	bcc @+
	sta maxy
@
	lda vertxs+1,x
	sta sx2 ;x2+1
	lda vertys+1,x
	sta sy2 ;y2+1
	jsr calc_slope ;poly
	inc do_verts3+1
	dec vertcount
	bne do_verts3
	jsr draw_poly ;render_scene
	jmp do_polys2 
	.endp

	.proc indexed_mode
	jsr get_byte
	sta numframeverts ;how many vertices per frame
	ldy #0		;now read the verticesx and verticesy
@	jsr get_byte
	sta vertxs,y
	jsr get_byte
	sta vertys,y
	iny
	cpy numframeverts
	bne @-
	
do_polys
	jsr get_byte
	sta polytype
	tax
	lda lsrf0tab,x
	ldy #$80
	sty VBXE_MEMAC_B_CONTROL
	sta polycol
	ldy #0
	sty VBXE_MEMAC_B_CONTROL
	txa
	and #$0f
	sta vertcount ;how many lines 0-15

	lda polytype
	cmp #$ff ;end of frame
	sne
	rts

	cmp #$fd ;end of file wrap
	bne @+
	mwa #stream_end get_byte_zp.ptr
	lda #3 ;-1 due to auto inc!!! 1+23+5+10
	sta bank
	mva #$4c nmi.stop
	mwa #blinking nmi.stop+1
	rts
@
	cmp #$fe ;end of 64k
	bne @+
	mwa #stream_end get_byte_zp.ptr
	rts
@
	mva #1 maxy
	mva #200 miny

	jsr get_byte ;first id
	ldx vertcount
	sta indices,x
	sta indices

	ldx #1
@
	jsr get_byte
	sta indices,x
	inx
	cpx vertcount
	bne @-
	
	lda #0
	sta verti
do_verts ;draw poly
	ldx verti
	ldy indices,x
	lda vertxs,y
	sta sx1 ;x1+1
	lda vertys,y
	sta sy1 ;y1+1
	cmp miny
	bcs @+
	sta miny
@
	cmp maxy
	bcc @+
	sta maxy
@
	ldy indices+1,x
	lda vertxs,y
	sta sx2 ;x2+1
	lda vertys,y
	sta sy2 ;y2+1

	jsr calc_slope	;poly ;_dda ;line x1,y1 to x2,y2

	inc verti
	lda verti
	cmp vertcount
	bne do_verts
	jsr draw_poly ;fill poly
	jmp do_polys
	.endp

;[10:46:28] axis/oxyron (oxy): just copy pal
;[10:46:32] axis/oxyron (oxy): once per frame
;[10:46:38] axis/oxyron (oxy): pal1, pal2, pal3
;[10:47:03] Karolj Nadj: yeah but... pal2 contains only delta colors to pal1
;[10:47:19] axis/oxyron (oxy): you always build pal1 out of the deltas
;[10:47:23] axis/oxyron (oxy): copy 1 to 2
;[10:47:25] axis/oxyron (oxy): copy 2 to 3
;[10:47:25] axis/oxyron (oxy): copy 3 to 4

;[10:47:28] axis/oxyron (oxy): and use 4 for rendering
;[10:47:35] axis/oxyron (oxy): e.g.


.proc copy_palette0
;@	lda VBXE_BLITTER_BUSY
;	bne @-
;
;	lda #$c0
;	sta VBXE_BL_ADR0
;	lda #$02
;	sta VBXE_BL_ADR1
;	lda #$00
;	sta VBXE_BL_ADR2
;	lda #1
;	sta VBXE_BLITTER_START
;	
;	lda #$80
;	sta VBXE_MEMAC_B_CONTROL

	ldx #0
	ldy #0
@
	stx VBXE_CSEL
	lda st_palette,y
	asl
	asl
	asl
	asl
	asl
	sta VBXE_CR
	lda st_palette+1,y
	asl
	asl
	asl
	asl
	asl
	sta VBXE_CG
	lda st_palette+2,y
	asl
	asl
	asl
	asl
	asl
	sta VBXE_CB
	iny
	iny
	iny
	inx
	cpx #16
	bne @-
;	lda #0
;	sta VBXE_MEMAC_B_CONTROL
	rts
.endp


* poly (.y, .a, x2, y2, x3, y3)
ttemp .byte 0

	.proc poly
	;jmp render_scene
	lda #$80
	sta VBXE_MEMAC_B_CONTROL

;	lda x1+1
;	sta sx1
;	lda y1+1
;	sta sy1
;	lda x2+1
;	sta sx2
;	lda y2+1
;	sta sy2
	jsr calc_slope
	lda #0
	sta VBXE_MEMAC_B_CONTROL
	rts
	.endp

	.proc draw_poly	
;	sanity check
;	debug spanlength calc
	lda #$80
	sta VBXE_MEMAC_B_CONTROL
@	lda VBXE_BLITTER_BUSY
	bne @-
	lda #8
ffff	sta $cfff

.if blitter_only=0
	ldy miny
@
	lda redges,y 
	sec
	sbc ledges,y
	bcs sk
	lda #0
sk
	sta redges,y
	iny
	cpy maxy
	bcc @-
.endif

;add+EOR#$ff = sub = spanlength
;fill in tables into scanedge blittlist
;	lda #$80
;	sta VBXE_MEMAC_B_CONTROL

	lda #$01	  ;
	sta VBXE_BL_ADR1
	lda #0
	sta VBXE_BL_ADR0
	sta VBXE_BL_ADR2
	lda #1
	sta VBXE_BLITTER_START

	
;blit poly_face	
	ldy maxy
	dey
	lda bcbsendlo,y
	sta fff+1
	sta ffff+1
	lda bcbsendhi,y
	sta fff+2
	sta ffff+2

	lda #0 ;stop flag
fff	sta $ffff

@	lda VBXE_BLITTER_BUSY
	bne @-

	lda #$00	  ;
	sta VBXE_BL_ADR2
	ldy miny
	lda bcbstartlo,y
	sta VBXE_BL_ADR0
	lda bcbstarthi,y
	sta VBXE_BL_ADR1

;blit face

	lda #1
	sta VBXE_BLITTER_START

	lda #0
	sta VBXE_MEMAC_B_CONTROL	
	rts
	.endp

	.proc generate_square_tables	; generate f(x)=int(x*x/4)
	ldx #$00
	txa
	.byte $c9
lb1   tya
	adc #$00
ml1   sta square1_hi,x
	tay
	cmp #$40
	txa
	ror 
ml9   adc #$00
	sta ml9+1
	inx
ml0   sta square1_lo,x
	bne lb1
	inc ml0+2
	inc ml1+2
	clc
	iny
	bne lb1
	; generate f(x)=int((x-255)*(x-255)/4)
	ldx #$00
	ldy #$ff
ml2   lda square1_hi+1,x
	sta square2_hi+$100,x
	lda square1_hi,x
	sta square2_hi,y
	lda square1_lo+1,x
	sta square2_lo+$100,x
	lda square1_lo,x
	sta square2_lo,y
	dey
	inx
	bne ml2
	rts
	.endp	

	.proc multiply_16x8bit_unsigned
; <T1 * <T2 = AAaa
; >T1 * <T2 = CCcc
;
;	 AAaa
; +   CCcc
; ----------
;   PRODUCT!
		
; Setup T1 if changed
	bcc m16x8u_setup_done
	lda T1+0
	sta sm1a16x8+1
	sta sm3a16x8+1
	eor #$ff
	sta sm2a16x8+1
	sta sm4a16x8+1
	lda T1+1
	sta sm1b16x8+1
	sta sm3b16x8+1
	eor #$ff
	sta sm2b16x8+1
	sta sm4b16x8+1
m16x8u_setup_done

		; Perform <T1 * <T2 = AAaa
		ldx T2+0
		sec
sm1a16x8	lda square1_lo,x
sm2a16x8	sbc square2_lo,x
		sta PRODUCT+0
sm3a16x8	lda square1_hi,x
sm4a16x8	sbc square2_hi,x
		sta _AA16x8+1

		; Perform >T1_hi * <T2 = CCcc
		sec
sm1b16x8	lda square1_lo,x
sm2b16x8	sbc square2_lo,x
		sta _cc16x8+1
sm3b16x8	lda square1_hi,x
sm4b16x8	sbc square2_hi,x
		sta PRODUCT+2

		; Add the separate multiplications together
		clc
_AA16x8	 lda #0
_cc16x8	 adc #0
		sta PRODUCT+1
		scc
		inc PRODUCT+2

		rts
	.endp

	.proc multiply_s16u8			;TODO Remove unused

	jsr multiply_16x8bit_unsigned

	; Apply sign (See C=Hacking16 for details).
	lda T1+1
	bpl ms16u8_signfix1_done
	sec
	lda PRODUCT+2
	sbc T2+0
	sta PRODUCT+2
ms16u8_signfix1_done
	rts
	.endp

	.proc multiply_u16s8	;TODO Remove unused
	lda t2
	bmi mulneg
	sec
	jsr multiply_16x8bit_unsigned
	rts			
mulneg	  
	lda #0 ;neg t2
	sec
	sbc t2
	sta t2
	sec
	jsr multiply_16x8bit_unsigned
	; Apply sign (See C=Hacking16 for details).
	lda #0
	sec
	sbc product+1
	sta product+1
	lda #0
	sbc product+2
	sta product+2
	rts
	.endp

	.local xdl
	;.byte $22	;repeat gfx 
	;.byte $10
	;.byte 1	
	.byte $72	;repeat+adr+gfx
	.byte $88	;stop
	.byte 199	;RPTL
;screenram
	.long $210000	;OVADR / vramadr
	.word $0100	;ystep of display	  
	.byte $10,$c0	;OVATT

;copies edge into bcb
copy_bcb
;vram adress for double buffer
	.long $000020 ;source
	.word 0		;source step y
	.byte 0		;source step x
	.long $000308	;destination adress
	.word 21	;dest. step y
	.byte 0		;dest step x
	.word 0		;size x
	.byte 199	;size y
	.byte $ff 	;and
	.byte $0	;XOR
	.byte 0		;collision AND
	.byte $00	;zoom
	.byte 0		;pattern
	.byte 8		;control
;polycolor
	.long $000022	;source color right edge 
	.word 0		;source step y
	.byte 0		;source step x
	.long $000310	;destination adress
	.word 0		;dest. step y
	.byte 21	;dest step x
	.word 199	;size x
	.byte 0		;size y
	.byte $ff 	;and
	.byte $00 	;XOR
	.byte 0	 	;collision AND
	.byte $00	;zoom
	.byte 0		;pattern
	.byte 8		;control

	.long $003e00 ;source adress left edge
	.word 0 ;source step y
	.byte 1 ;source step x
	.long $000306 ;destination adress
	.word 0 ;dest. step y
	.byte 21 ;dest step x
	.word 239 ;size x
	.byte 0	;size y
	.byte $ff 	;and
	.byte $00     ; XOR
	.byte 0	 ;  collision AND
	.byte $00	 ; zoom
	.byte 0	 ; pattern
	.byte 8		 ; control

.if blitter_only=1
;spanlength = left AND #$ff EOR #$ff+right into right+1
	.long $003e00 ;source 
	.word 1 ;source step y
	.byte 0 ;source step x
	.long $003d00 ;destination adress
	.word 1 ;dest. step y
	.byte 0 ;dest step x
	.word 0 ;size x
	.byte 199	;size y
	.byte $ff 	;and
	.byte $ff     ; XOR
	.byte 0	 ;  collision AND
	.byte $00	 ; zoom
	.byte 0	 ; pattern
	.byte 10	 ; control

	.long $000000 ;source
	.word 0 ;source step y
	.byte 0 ;source step x
	.long $003d00 ;destination adress
	.word 1 ;dest. step y
	.byte 0 ;dest step x
	.word 0 ;size x
	.byte 199	;size y
	.byte $00 	;and
	.byte $01 ;     ; XOR
	.byte 0	 ;  collision AND
	.byte $00	 ; zoom
	.byte 0	 ; pattern
	.byte 10	 ; control
.endif

;spanlength
	.long $003d00 ;source length edge
	.word 0 ;source step y
	.byte 1 ;source step x
	.long $00030c ;destination adress
	.word 0 ;dest. step y
	.byte 21 ;dest step x
	.word 199 ;size x
	.byte 0	;size y
	.byte $ff 	;and
	.byte $00     ; XOR
	.byte 0	 ;  collision AND
	.byte $00	 ; zoom
	.byte 0	 ; pattern
	.byte 0		 ; control

	.long $003c00 ;source color start
	.word 0 ;source step y
	.byte 1 ;source step x
	.long $000301 ;destination adress
	.word 0 ;dest. step y
	.byte 21 ;dest step x
	.word 199 ;size x
	.byte 0	;size y
	.byte $ff 	;and
	.byte $00     ; XOR
	.byte 0	 ;  collision AND
	.byte $00	 ; zoom
	.byte 0	 ; pattern
	.byte 8		 ; control

;stepy value
	.long $003a00 ;source step value
	.word 0 ;source step y
	.byte 1 ;source step x
	.long $000303 ;destination adress
	.word 21 ;dest. step y
	.byte 1 ;dest step x
	.word 2-1 ;size x
	.byte 199	;size y
	.byte $ff 	;and
	.byte $00     ; XOR
	.byte 0	 ;  collision AND
	.byte $00	 ; zoom
	.byte 0	 ; pattern
	.byte 0		 ; control
	
;copypalette 0,30,60,90,c0
copypalette_bcb
	.long palette+4*48-$4000 ;$003f90 ;source
	.word -48 ;source step y
	.byte 1 ;source step x
	.long palette+5*48-$4000 ;$003fc0 ;destination adress
	.word -48	;dest. step y
	.byte 1		;dest step x
	.word 47	;size x
	.byte 4		;size y
	.byte $ff 	;and
	.byte $00	;XOR
	.byte 0		;collision AND
	.byte $00	 ;zoom
	.byte 0		; pattern
	.byte 0		; control

cls_bcb	.long $000000 ;source adress ;$0400 texture $5000
	.word 0 ;source step y
	.byte 0 ;source step x
	.long $040000 ;destination adress
	.word 256 ;dest. step y
	.byte 1
	.word 255 ;size x
	.byte 199	;size y
	.byte $00 	;and
	.byte $00     ; XOR
	.byte 0	 ;  collision AND
	.byte $00	 ; zoom
	.byte $00	 ; pattern
	.byte 0; control

;copy $a000-$bfff to $8000-$9fff 
	.long $000000 ;source adress ;$0400 texture $5000
	.word 0 ;source step y
	.byte 0 ;source step x
	.long $040000 ;destination adress
	.word 256 ;dest. step y
	.byte 1
	.word 255 ;size x
	.byte 31	;size y
	.byte $00 	;and
	.byte $00     ; XOR
	.byte 0	 ;  collision AND
	.byte $00	 ; zoom
	.byte $00	 ; pattern
	.byte 0; control

;calc slope
slope_bcb
	.long $070000 ;source length edge
	.word $0000 ;source step y = m
	.byte 0 ;source step x
	.long $003d00 ;right edge or left
	.word 1 ;dest. step y
	.byte 0 ;dest step x
	.word 0 ;size x ;only 1 byte copy
	.byte 199	;size y
	.byte $ff 	;and
	.byte $00     ; XOR
	.byte 0	 ;  collision AND
	.byte $00	 ; zoom
	.byte 0	 ; pattern
	.byte 0		 ; control

hline_bcb
	.long $074000 ;source adress
	.word 0 ;source step y
	.byte 0 ;source step x
	.long $010000+0*256 ;destination adress
	.word 0 ;0 ;dest. step y
	.byte 1 ;-1
	.word 0 ;size x
	.byte 0	;size y
	.byte $00 	;and
	.byte $ff     ; XOR
	.byte 0	 ;  collision AND
	.byte $00	 ; zoom
	.byte 0	 ; pattern
	.byte 8 ;2		 ; control
	.endl			;end of xdl

fastexit rts

	.proc calc_slope	
;TO DO: WORK WITH 16bit coords here!!! see POLY
	lda VBXE_BLITTER_BUSY ;wait blitter
	bne calc_slope
	lda #$80
	sta VBXE_MEMAC_B_CONTROL
	lda #1
	sta $4289
	lda #0
	sta $428a
	lda #$3e ;left edge buffer 
	sta $4287


	lda sy2
	cmp sy1 
	beq fastexit
	bcs @+ ;swap needed?
	ldx sx1
	lda sx2
	sta sx1
	stx sx2
	ldx sy1
	lda sy2
	sta sy1
	stx sy2
	lda #$3d ;right edge buffer 
	sta $4287

@	
	lda sx1
	sta $4281 ;start x value of edge = startpos in array (*256 as 8.8 format)
;table entry point is at sy1
	ldx sy1
	stx $4286 ;low byte of edge table starting at sy1 = fill array at sy1 to sy2	

	lda sx1
	cmp sx2
	bcs signed_version ;>=

	lda sx2 
	sec
	sbc sx1
	sta T2
;	lda #0
;	sta T1+1

	lda sy2
	sec
	sbc sy1 
	tax
	lda recitab32768,x
	sta T1
	lda recitab32768+256,x
	sta T1+1
	dex
	stx $428e ;edge length = blit size y

	sec
	jsr multiply_16x8bit_unsigned ;dx/dy = dx * 1/dy
	
;blitprep 
;check if slope>$1000 then slope=$1000
;check if slope>$-1000 then slope=-$1000 = $1fff	
;$f000 = -4096 ;in blitter 12bit step (13bits!!!)
;$f001 = -4095
;$ffff = -1
;$0001 = +1
;$0fff = +4095

	lda product+1
	sta $4283
	lda product+2
	sta $4284
	
	lda product+2
	cmp #$0f
	bcc do_blit
	lda #$0f
	sta $4284
	lda #$ff
	sta $4283		

;start blit
do_blit
	lda #128
	sta VBXE_BL_ADR0
	lda #$02	  ;$000280 = $4280 bank #0
	sta VBXE_BL_ADR1	 ; blitter addr
	lda #$00	  ;
	sta VBXE_BL_ADR2
    	lda #1
	sta VBXE_BLITTER_START	 ; start blitter (LERP)

	lda #0
	sta VBXE_MEMAC_B_CONTROL
	rts

	.proc signed_version
	lda #$ff
	sta $4289
	sta $428a

	lda sx2
	sta $4281 ;start x value of edge = startpos in array (*256 as 8.8 format)
;table entry point is at sy1
	lda sy2
	sec
	sbc #1
	sta $4286 ;low byte of edge table starting at sy1 = fill array at sy2 to sy1	

	lda sx1 
	sec
	sbc sx2
	sta T2
;	lda #0
;	sta T1+1
;;##TRACE "sx1= %02x sx2= %02x sx1-sx2= %04x" db(sx1) db(sx2) dw(t1)

	lda sy2
	sec
	sbc sy1 ;delta y in 16bit signed but always >0
	tax
	lda recitab32768,x
	sta T1
	lda recitab32768+256,x
	sta T1+1
	dex
	stx $428e ;edge length = blit size y
	sec
	jsr multiply_16x8bit_unsigned	 ;dx/dy = dx * 1/dy

;;##TRACE "sy1= %02x sy2= %02x sy2-sy1= %04x" db(sy1) db(sy2) dw(product+1)
	
;blitprep 
;check if slope>$1000 then slope=$1000
;check if slope>$-1000 then slope=-$1000 = $1fff	
;$f000 = -4096 ;in blitter 12bit step (13bits!!!)
;$f001 = -4095
;$ffff = -1
;$0001 = +1
;$0fff = +4095

	lda product+1
	sta $4283
	lda product+2
	sta $4284
	
	lda product+2
	cmp #$0f
	bcc do_blit
	lda #$0f
	sta $4284
	lda #$ff
	sta $4283		

;start blit
do_blit
@	lda VBXE_BLITTER_BUSY
	bne @-	

	lda #128
	sta VBXE_BL_ADR0
	lda #$02	;$000280 = $4280 bank #0
	sta VBXE_BL_ADR1	; blitter addr
	lda #$00	  ;
	sta VBXE_BL_ADR2
    	lda #1
	sta VBXE_BLITTER_START	 ; start blitter (draw starts)

	lda #0
	sta VBXE_MEMAC_B_CONTROL
	rts
	.endp

	.endp

fastexitc	rts

	.proc init_vbxe
	ldx #1
	stx VBXE_PSEL
	ldx #0
	stx VBXE_CSEL ;which colour # to change
	stx VBXE_CR
	stx VBXE_CG
	stx VBXE_CB
	rts
	.endp	
		
	.proc waitblt
	lda VBXE_BLITTER_BUSY	 ; wait until not-busy
	bne waitblt
	rts
	.endp

	.proc blinking
	inc blink
	lda blink
	and #$0f
	sta colpf0
	jmp nmi.skip

blink 	.byte 0

	.endp

	.proc clear_vram
	jsr waitblt
	lda #$00
	sta VBXE_BL_ADR0
	lda #$02		;$000200 = $4200 bank #0
	sta VBXE_BL_ADR1	; blitter addr
	lda #$00
	sta VBXE_BL_ADR2
	lda #$80
	sta VBXE_MEMAC_B_CONTROL
    ldx screencounter
    lda screentab,x
  	sta screen
   	sta $4208		;clr_bcb which screen
	lda #0
	sta VBXE_MEMAC_B_CONTROL
    lda #1
	sta VBXE_BLITTER_START	 ;start blitter
	jsr waitblt
	rts
	.endp
	
	.proc init_gouraud_map
	lda #$dc			;bank 1
	sta VBXE_MEMAC_B_CONTROL	;cpu-vram access window at $4000, $70000
	ldy #0
@	ldx #0
@	tya
ad	sta gradientmap,x
	inx 
	bne @-
	inc ad+2
	iny
	cpy #$40
	bne @-1

	lda #$dd			;bank 2
	sta VBXE_MEMAC_B_CONTROL	;$008000
	
	ldy #$40
@	ldx #0
@	tya
ad2	sta gradientmap,x
	inx 
	bne @-
	inc ad2+2
	iny
	cpy #$80
	bne @-1

	lda #$de			;bank 3
	sta VBXE_MEMAC_B_CONTROL	;cpu-vram access window at $4000

	ldy #128
@	ldx #0
@	tya
ad3	sta gradientmap,x
	inx 
	bne @-
	inc ad3+2
	iny
	cpy #$c0
	bne @-1

	lda #$df	;bank 1
	sta VBXE_MEMAC_B_CONTROL	 ;cpu-vram access window at $4000

	ldy #$c0
@	ldx #0
@	tya
ad4	sta gradientmap,x
	inx 
	bne @-
	inc ad4+2
	iny
	bne @-1
	.endp
	jmp init_end
;

init_end
	lda #$80
	sta VBXE_MEMAC_B_CONTROL
	rts	

	.align $100
	.local dlist
	.byte $60
	.byte $80
:25	.byte $70
	.byte $c6
	.word text
	.byte $41
	.word dlist
	.endl

	.local dlistintro
	.byte $70,$f0,$70,$47
	.word intro_text,$40
:7	.byte $07,$70
	.byte $87,$70
	.byte $41
	.word dlistintro
	.endl

get_byte_rom
	.proc get_byte_zp,get_byte
	lda ptr+1
	cmp #>stream_end
	bne same_bank
	mwa #stream_data ptr	;bb5c
	inc bank
	lda bank
	sta bank_register
	sta $d500
bank_register = *-2

same_bank
	lda stream_data ;$bb5c ;streamdata
ptr	= *-2
	inw ptr
	rts
	.endp


text 	dta d"    00:00:00     "	
digits 	dta d"0123456789"

fontcopy ins "Antic.chr",0,512

st_palette:
;ST:   xx xx xx xx xx R2 R1 R0  xx G2 G1 G0 xx B2 B1 B0
;SNES: 0BBBBBGG GGGRRRRR
;VBXE: RRRRRRRR,GGGGGGGG,BBBBBBBB

;	dc.w	$000	;0x00,0x00,0x00
;  .byte $00,$00
	.byte $00,$00,$00
;	dc.w	$0111	;0x08,0x08,0x08
 ; .byte $01,$11
	.byte $01,$01,$01
;	dc.w	$0012	;0x20,0x10,0x08
 ; .byte $01,$02
	.byte $00,$10,$02
;	dc.w	$0201	;0x10,0x08,0x20
 ; .byte $02,$01
	.byte $02,$00,$01
;	dc.w	$0112	;0x20,0x18,0x18
;  .byte $01,$12
	.byte $01,$01,$02
;	dc.w	$0212	;0x20,0x18,0x20
;  .byte $02,$12
	.byte $02,$01,$02
;	dc.w	$0223	;0x31,0x20,0x20
;  .byte $02,$23
	.byte $02,$02,$03
;	dc.w	$0322	;0x20,0x20,0x39
;  .byte $03,$22
	.byte $03,$02,$02
;	dc.w	$0422	;0x20,0x20,0x41
;  .byte $04,$22
	.byte $04,$02,$02
;	dc.w	$0224	;0x4A,0x29,0x20
;  .byte $02,$24
	.byte $02,$02,$04
;	dc.w	$0136	;0x6A,0x39,0x18
;  .byte $01,$36
	.byte $01,$03,$06
;	dc.w	$0532	;0x20,0x31,0x5A
;  .byte $05,$32
	.byte $05,$03,$02
;	dc.w	$0434	;0x4A,0x39,0x4A
;  .byte $04,$34
	.byte $04,$03,$04
;	dc.w	$0545	;0x5A,0x41,0x5A
;  .byte $05,$45
	.byte $05,$04,$05
;	dc.w	$0741	;0x18,0x4A,0x8B
;  .byte $07,$41
	.byte $07,$04,$01
;	dc.w	$777	;0xFF,0xFF,0xFF
 ; .byte $07,$77
	.byte $07,$07,$07


	.local intro_text
	.byte d"   DESIRE PRESENT   "
	.byte d" NEW COMPO FOR YOU! "
	.byte d"   NOSTALGIC TRIP   "
	.byte d"--------------------"
	.byte d"WHO RENDERS FASTER? "
	.byte d"                    "
	.byte d"WE USE 1:1 ST DATA! "
	.byte d"     MORE INFO      "
	.byte d" WWW.ARSANTICA.COM  "
	.endl

	.align $100
	.local recitab32768
	ins "ST-NICCC-ReciTab65536.dat" ;512 bytes
	.endl

	org $2000
    ins 'music.dat'


.if standalone=1
	org stream_data
	ins "old_scene.bin",0*65536,$4000
	run init
.endif

