Page 1 of 1

Clover

Posted: Fri Sep 07, 2012 12:32 pm
by macgub
I ported randall clover renderer (see linux section of forum board.flatassembler.net) into KolibriOS and MenuetOS64. Orginally it was 64 bit and SSE3, now its 32 bit and SSE2.
http://macgub.vxm.pl/menuet/clover.zip -> code
http://macgub.vxm.pl/menuet/clover_big.png -> screenshot
8)

Re: Clover

Posted: Fri Sep 07, 2012 12:43 pm
by XVilka
Please, attach sources too, to prevent situation, when you've changed site url, and file will be unavailable. Good job!

Re: Clover

Posted: Fri Sep 14, 2012 1:15 pm
by macgub
OK

Code: Select all

; clover renderer written by randall
; orginally 64 bit code and sse3 instructions for Linux
; ported to KoibriOS and MenuetOS64 by macgub
; www.macgub.hekko.pl
; now it's 32 bit code and sse2 instructions


Menuet = 1
Kolibri = 0
OS = Kolibri

IMG_W=600
IMG_H=600



use32

	       org    0x0

	       db     'MENUET01'	      ; 8 byte id
	       dd     0x01		      ; header version
	       dd     START		      ; start of code
	       dd     IMG_END			; size of image
	       dd     I_END ;0x100000                ; memory for app
	       dd     I_END ;0xbffff                 ; esp
	       dd     0x0 , 0x0 	      ; I_Param , I_Icon

START:				; start of execution

   ;  call main
     call draw_window
     call main
     call draw_from_buffer

still:

;    mov  eax,23                 ; wait here for event
;    mov  ebx,timeout
;    int  0x40
 ;   mov eax,11                   ; check for event no wait
    mov eax,10			; wait for event
    int 0x40

    cmp  eax,1			; redraw request ?
    je	 red
    cmp  eax,2			; key in buffer ?
    je	 key
    cmp  eax,3			; button in buffer ?
    je	 button

    jmp  noclose

  red:				; redraw
    call draw_window
    call draw_from_buffer
    jmp  still

  key:				; key
    mov  eax,2			; just read it and ignore
    int  0x40
    shr  eax,8
    cmp  eax, 27
    jne  still
    mov  eax, -1
    int  0x40


  button:			; button
    mov  eax,17 		; get id
    int  0x40

    cmp  ah,1			; button id=1 ?
    jne  noclose

    mov  eax,-1 		; close this program
    int  0x40
  noclose:

    jmp  still



draw_from_buffer:

	    mov eax,7
	    mov ebx,screen4
	    mov ecx,IMG_W*65536+IMG_H
      if OS = Kolibri
	    mov edx,0*65536+0
      else
	    mov edx,10*65536+25
      end if
	    int 0x40
ret


;-------------------------------------------------------------------------------
; NAME:         SMOOTHSTEP
; IN:           xmm0        edge0
; IN:           xmm1        edge1
; IN:           xmm2        x
; OUT:          xmm0        smoothstep(edge0, edge1, x) like in GLSL
;-------------------------------------------------------------------------------
macro		SMOOTHSTEP  {
		subps	    xmm2,xmm0 ; x - edge0
		subps	    xmm1,xmm0 ; edge1 - edge0
		divps	    xmm2,xmm1
		minps	    xmm2,dqword [g_1_0]
		maxps	    xmm2,dqword [g_0_0]
		movaps	    xmm0,dqword [g_3_0]
		movaps	    xmm1,xmm2
		addps	    xmm1,xmm1
		subps	    xmm0,xmm1
		mulps	    xmm2,xmm2
		mulps	    xmm0,xmm2
}
;-------------------------------------------------------------------------------
;segment readable executable
;-------------------------------------------------------------------------------
; NAME:         sin
; IN:           xmm0        | w z y x |
; OUT:          xmm0        | sin(w) sin(z) sin(y) sin(x) |
;-------------------------------------------------------------------------------
align 16
sin:
		movaps	    xmm7,xmm0
		andps	    xmm0,dqword [g_inv_sign_mask]
		andps	    xmm7,dqword [g_sign_mask]
		mulps	    xmm0,dqword [g_2_div_pi]
		pxor	    xmm3,xmm3
		movdqa	    xmm5,dqword [g_1]
		movaps	    xmm4,dqword [g_1_0]
		cvttps2dq   xmm2,xmm0
		pand	    xmm5,xmm2
		pcmpeqd     xmm5,xmm3
		cvtdq2ps    xmm6,xmm2
		pand	    xmm2,dqword [g_2]
		pslld	    xmm2,30
		subps	    xmm0,xmm6
		minps	    xmm0,xmm4
		subps	    xmm4,xmm0
		andps	    xmm0,xmm5
		andnps	    xmm5,xmm4
		orps	    xmm0,xmm5
		movaps	    xmm1,xmm0
		mulps	    xmm0,xmm0
		xorps	    xmm2,xmm7
		orps	    xmm1,xmm2
		movaps	    xmm7,xmm0
		mulps	    xmm0,dqword [g_sincos_p3]
		addps	    xmm0,dqword [g_sincos_p2]
		mulps	    xmm0,xmm7
		addps	    xmm0,dqword [g_sincos_p1]
		mulps	    xmm0,xmm7
		addps	    xmm0,dqword [g_sincos_p0]
		mulps	    xmm0,xmm1
		ret
;-------------------------------------------------------------------------------
; NAME:         atanr2
; IN:           xmm0        | w0 z0 y0 x0 |
; IN:           xmm1        | w1 z1 y1 x1 |
; OUT:          xmm0        | atan2(w0,1/w1) atan2(z0,1/z1) atan2(y0,1/y1) atan2(x0,1/x1) |
;-------------------------------------------------------------------------------
align 16
atanr2:
		movaps	    xmm2,dqword [g_sign_mask]
		xorps	    xmm3,xmm3
		movaps	    xmm5,dqword [g_1_0]
		andps	    xmm2,xmm0
		mulps	    xmm0,xmm1
		orps	    xmm2,dqword [g_pi]
		cmpleps     xmm3,xmm1
		movaps	    xmm6,dqword [g_m1_0]
		rcpps	    xmm4,xmm0
		cmpltps     xmm5,xmm0
		cmpnleps    xmm6,xmm0
		movaps	    xmm1,dqword [g_atan_s0]
		orps	    xmm5,xmm6
		movaps	    [xxmm8],xmm2
		movaps	    [xxmm9],xmm3
		andps	    xmm4,xmm5
		movaps	    xmm2,dqword [g_atan_t0]
		movaps	    xmm7,xmm5
		andnps	    xmm5,xmm0
		movaps	    xmm3,dqword [g_atan_s1]
		orps	    xmm4,xmm5
		movaps	    xmm0,xmm4
		movaps	    xmm6,dqword [g_atan_t1]
		mulps	    xmm4,xmm4
		addps	    xmm1,xmm4
		movaps	    xmm5,dqword [g_atan_s2]
		rcpps	    xmm1,xmm1
		mulps	    xmm1,xmm2
		movaps	    xmm2,dqword [g_atan_t2]
		addps	    xmm3,xmm4
		addps	    xmm1,xmm3
		movaps	    xmm3,dqword [g_atan_s3]
		rcpps	    xmm1,xmm1
		mulps	    xmm1,xmm6
		movaps	    xmm6,dqword [g_atan_t3]
		addps	    xmm5,xmm4
		addps	    xmm1,xmm5
		movaps	    xmm5,dqword [g_sign_mask]
		rcpps	    xmm1,xmm1
		mulps	    xmm1,xmm2
		addps	    xmm3,xmm4
		movaps	    xmm4,dqword [g_0_5pi]
		mulps	    xmm6,xmm0
		addps	    xmm1,xmm3
		andps	    xmm0,xmm5
		rcpps	    xmm1,xmm1
		movaps	    xmm3,[xxmm9]
		mulps	    xmm1,xmm6
		orps	    xmm0,xmm4
		subps	    xmm0,xmm1
		movaps	    xmm2,[xxmm8]
		andps	    xmm0,xmm7
		andnps	    xmm7,xmm1
		orps	    xmm0,xmm7
		movaps	    xmm1,xmm0
		andps	    xmm0,xmm3
		addps	    xmm1,xmm2
		andnps	    xmm3,xmm1
		orps	    xmm0,xmm3
		ret
;-------------------------------------------------------------------------------
; NAME:         ComputeColors
; IN:           edi         pointer to 4 image pixels
; IN:           xmm0        | x3 x2 x1 x0 | normalized pixel x coordinates
; IN:           xmm1        | y3 y2 y1 y0 | normalized pixel y coordinates
; OUT:          xmm0        | ? b g r | pixel color
;-------------------------------------------------------------------------------
align 16
ComputeColors:
		; r = sqrt(x * x + y * y)
		movaps	    xmm2,xmm0
		mulps	    xmm2,xmm0
		movaps	    xmm3,xmm1
		mulps	    xmm3,xmm1
		addps	    xmm2,xmm3
		movaps	    [xxmm3],xmm3
		movaps	    xmm3,[xxmm15]
		sqrtps	    xmm3,xmm2	       ; xmm15 = r
		movaps	    [xxmm15],xmm3
		movaps	    xmm3,[xxmm3]
		; a = atan2(y, x) = atanr2(y, 1/x)
		movaps	    xmm2,xmm0
		movaps	    xmm0,xmm1
		movaps	    xmm1,xmm2
		rcpps	    xmm1,xmm1
		call	    atanr2
		movaps	    [xxmm14],xmm0	   ; xmm14 = a
		; s = 0.5 + 0.5 * sin(3.0 * a)
		mulps	    xmm0,dqword [g_3_0]
		movaps	    [xxmm12],xmm0	   ; xmm12 = 3.0 * a
		call	    sin
		movaps	    xmm1,dqword [g_0_5]
		mulps	    xmm0,xmm1
		addps	    xmm0,xmm1
		movaps	    [xxmm13],xmm0	   ; xmm13 = s
		; g = sin(0.5 * pi + 3.0 * a)
		movaps	    xmm0,[xxmm12]
		addps	    xmm0,dqword [g_0_5pi]
		call	    sin 		; xmm0 = g
		; d = 0.3 + 0.6 * sqrt(s) + 0.15 * g * g
		mulps	    xmm0,xmm0
		mulps	    xmm0,dqword [g_0_15]
		movaps	    [xxmm3],xmm3
		movaps	    xmm3,[xxmm13]
		sqrtps	    xmm3,xmm3
		mulps	    xmm3,dqword [g_0_6]
		movaps	    xmm3,[xxmm3]
		addps	    xmm0,[xxmm13]
		addps	    xmm0,dqword [g_0_3] ; xmm0 = d
		; h = r / d
		movaps	    [xxmm3],xmm3
		movaps	    xmm3,[xxmm15]
		divps	    xmm3,xmm0	       ; xmm15 = h
		; f = 1.0 - smoothstep(0.95, 1.0, h)
		movaps	    [xxmm15],xmm3
		movaps	    xmm3,[xxmm3]
		movaps	    xmm0,dqword [g_0_95]  ; edge0
		movaps	    xmm1,dqword [g_1_0]   ; edge1
		movaps	    xmm2,[xxmm15]	     ; x
		SMOOTHSTEP
		movaps	    [xxmm3],xmm3
		movaps	    xmm3,dqword [g_1_0]
		subps	    xmm3,xmm0		 ; xmm14 = f
		movaps	    [xxmm14],xmm3
		movaps	    xmm3,[xxmm3]
		;
		movaps	    xmm0,[xxmm12]
		call	    sin
		movaps	    xmm2,xmm0
		movaps	    xmm0,[xxmm15]
		mulps	    xmm0,dqword [g_0_05]
		addps	    xmm0,dqword [g_0_95]
		movaps	    xmm1,dqword [g_1_0]
		SMOOTHSTEP
		movaps	    xmm1,dqword [g_1_0]
		movaps	    xmm2,xmm1
		subps	    xmm1,[xxmm15]
		mulps	    xmm1,dqword [g_0_5]
		mulps	    xmm0,xmm1
		subps	    xmm2,xmm0
		movaps	    [xxmm3],xmm3
		movaps	    xmm3,[xxmm15]
		mulps	    xmm3,xmm2
		movaps	    [xxmm15],xmm3
		movaps	    xmm3,[xxmm3]
		;
		movaps	    xmm6,dqword [g_1_0]
		movaps	    xmm7,dqword [g_c0]
		mulps	    xmm7,[xxmm15]
		addps	    xmm7,dqword [g_c1]

		movaps	    xmm0,[xxmm14]
		shufps	    xmm0,xmm0,00000000b ; xmm0 = | f f f f |
		movaps	    xmm1,xmm6
		subps	    xmm1,xmm0		; xmm1 = | 1-f 1-f 1-f 1-f |
		mulps	    xmm0,xmm7
		mulps	    xmm1,xmm6
		addps	    xmm0,xmm1
		movaps	    [edi+0],xmm0

		movaps	    xmm0,[xxmm14]
		shufps	    xmm0,xmm0,01010101b ; xmm0 = | f f f f |
		movaps	    xmm1,xmm6
		subps	    xmm1,xmm0		; xmm1 = | 1-f 1-f 1-f 1-f |
		mulps	    xmm0,xmm7
		mulps	    xmm1,xmm6
		addps	    xmm0,xmm1
		movaps	    [edi+16],xmm0

		movaps	    xmm0,[xxmm14]
		shufps	    xmm0,xmm0,10101010b ; xmm0 = | f f f f |
		movaps	    xmm1,xmm6
		subps	    xmm1,xmm0		; xmm1 = | 1-f 1-f 1-f 1-f |
		mulps	    xmm0,xmm7
		mulps	    xmm1,xmm6
		addps	    xmm0,xmm1
		movaps	    [edi+32],xmm0

		movaps	    xmm0,[xxmm14]
		shufps	    xmm0,xmm0,11111111b ; xmm0 = | f f f f |
		movaps	    xmm1,xmm6
		subps	    xmm1,xmm0		; xmm1 = | 1-f 1-f 1-f 1-f |
		mulps	    xmm0,xmm7
		mulps	    xmm1,xmm6
		addps	    xmm0,xmm1
		movaps	    [edi+48],xmm0
		ret
;-------------------------------------------------------------------------------
;-------------------------------------------------------------------------------
main:
imgptr		equ	    ebp-8
imgptr_float	equ	    ebp-16
x		equ	    ebp-32
y		equ	    ebp-48

		push	    ebp
		mov	    ebp,esp
		sub	    esp,64

		mov	    dword [imgptr],screen4
		mov	    dword [imgptr_float],screen16
		mov	    ebx,[imgptr_float]
		;;
		;; Compute image
		;;
		; begin loops
		pxor	    xmm0,xmm0
		movdqu	    [y],xmm0
.LoopY:
		movdqa	    xmm0,dqword [g_3_2_1_0]
		movdqu	    [x],xmm0
.LoopX:
		mov	    edi,ebx
		; compute normalized x coordinate [-1.0 , 1.0]

		movups	    xmm0,[x]
		cvtdq2ps    xmm0,xmm0
		divps	    xmm0,dqword [g_img_w]
		subps	    xmm0,dqword [g_0_5]
		addps	    xmm0,xmm0
		; compute normalized y coordinate [-1.0 , 1.0]
		movups	    xmm1,[y]
		cvtdq2ps    xmm1,xmm1
		divps	    xmm1,dqword [g_img_h]
		subps	    xmm1,dqword [g_0_5]
		addps	    xmm1,xmm1
		call	    ComputeColors
		; advance pixel pointer
		add	    ebx,64
		; continue .LoopX
		movdqu	    xmm0,[x]
		paddd	    xmm0,dqword [g_4]
		movdqu	    [x],xmm0
		cmp	    dword [x+12],IMG_W
		jb	    .LoopX
		; continue .LoopY
		movdqu	    xmm0,[y]
		paddd	    xmm0,dqword [g_1]
		movdqu	    [y],xmm0
		cmp	    dword [y],IMG_H
		jb	    .LoopY
		;;
		;; Convert image
		;;
		mov	    esi,[imgptr_float]
		mov	    edi,[imgptr]
		mov	    ecx,IMG_W*IMG_H
.Convert:
		movaps	    xmm0,[esi]
		maxps	    xmm0,dqword [g_0_0]
		minps	    xmm0,dqword [g_1_0]
		mulps	    xmm0,dqword [g_255_0]
		cvttps2dq   xmm0,xmm0

		movd	    eax,xmm0
		or	    eax,0xff000000
		mov	    [edi],eax
		add	    esi,16
		add	    edi,4
		sub	    ecx,1
		jnz	    .Convert

		mov	    esi,[imgptr]	  ; convert again :)
		mov	    edi,esi
		cld
		mov	    ecx,IMG_W*IMG_H
	  @@:
		lodsd
		xchg	    ah,al
		stosd
		dec	    edi
		loop	    @b

		mov	    esp,ebp
		pop	    ebp
		ret

;   *********************************************
;   *******  WINDOW DEFINITIONS AND DRAW ********
;   *********************************************
draw_window:

    mov  eax,12 		   ; function 12:tell os about windowdraw
    mov  ebx,1			   ; 1, start of draw
    int  0x40
								   ; DRAW WINDOW
    mov  eax,0	; function 0 : define and draw window
  if OS = Kolibri
    mov  ebx,100*65536+IMG_W+9	    ; [x start] *65536 + [x size]
    mov  ecx,100*65536+IMG_H+25     ; [y start] *65536 + [y size]
  else
    mov  ebx,100*65536+IMG_W+18      ; [x start] *65536 + [x size]
    mov  ecx,100*65536+IMG_H+34     ; [y start] *65536 + [y size]
  end if
    mov  edx,0x74000000 		   ; color of work area RRGGBB,8->color gl
    mov  edi,labelt
    int  0x40

    ; WINDOW LABEL
    mov     eax,4	    ; function 4 : write text to window
    mov     ebx,8*65536+8   ; [x start] *65536 + [y start]
    mov     ecx,0x20ddeeff  ; font 1 & color ( 0xF0RRGGBB )
    mov     edx,labelt	    ; pointer to text beginning
    mov     esi,labelen-labelt	   ; text length
    int     0x40

				   ; WINDOW LABEL
    mov  eax,12 		   ; function 12:tell os about windowdraw
    mov  ebx,2			   ; 2, end of draw
    int  0x40

    ret



;-------------------------------------------------------------------------------
;align 1
labelt:
 db  'clover',0
labelen:

align 16
g_img_w 	dd	    4 dup 600.0
g_img_h 	dd	    4 dup 600.0

g_c0		dd	    0.4,0.3,0.0,0.0
g_c1		dd	    0.0,0.2,0.0,0.0
g_3_0		dd	    4 dup 3.0
g_0_3		dd	    4 dup 0.3
g_0_6		dd	    4 dup 0.6
g_0_15		dd	    4 dup 0.15
g_0_95		dd	    4 dup 0.95
g_0_05		dd	    4 dup 0.05

;g_img_conv_mask db          8,4,0,12,12 dup 0x80
g_0_0		dd	    4 dup 0.0
g_0_5		dd	    4 dup 0.5
g_1_0		dd	    4 dup 1.0
g_m1_0		dd	    4 dup -1.0
g_255_0 	dd	    4 dup 255.0
g_1		dd	    4 dup 1
g_2		dd	    4 dup 2
g_4		dd	    4 dup 4
g_3_2_1_0	dd	    0,1,2,3
g_sign_mask	dd	    4 dup 0x80000000
g_inv_sign_mask dd	    4 dup not 0x80000000
g_0_5pi 	dd	    4 dup 1.57079633
g_2_div_pi	dd	    4 dup 0.636619772
g_pi		dd	    4 dup 3.1415926535897

g_sincos_p0	dd	    4 dup 1.5707963267948963959
g_sincos_p1	dd	    4 dup -0.64596409750621907082
g_sincos_p2	dd	    4 dup 0.07969262624561800806
g_sincos_p3	dd	    4 dup -0.00468175413106023168

g_atan_t0	dd	    4 dup -0.091646118527267623468
g_atan_t1	dd	    4 dup -1.3956945682312098640
g_atan_t2	dd	    4 dup -94.393926122725531747
g_atan_t3	dd	    4 dup 12.888383034157279340
g_atan_s0	dd	    4 dup 1.2797564625607904396
g_atan_s1	dd	    4 dup 2.1972168858277355914
g_atan_s2	dd	    4 dup 6.8193064729268275701
g_atan_s3	dd	    4 dup 28.205206687035841409
;-------------------------------------------------------------------------------
IMG_END:
align 16
;xx:
;               rb 16
;yy:
;               rb 16
xxmm3:
	       rb 16
xxmm8:
	       rb 16
xxmm9:
	       rb 16
xxmm10:
	       rb 16
xxmm12:
	       rb 16
xxmm13:
	       rb 16
xxmm14:
	       rb 16
xxmm15:
	       rb 16
align 16
screen4        rb IMG_W * IMG_H * 4
screen16       rb IMG_W * IMG_H * 16
memStack       rb 4000
I_END: