I ported randall clover renderer (see linux section of forum board.flatassembler.net) into KolibriOS and MenuetOS64. Orginally it was 64 bit and SSE3, now its 32 bit and SSE2.
http://macgub.vxm.pl/menuet/clover.zip -> code
http://macgub.vxm.pl/menuet/clover_big.png -> screenshot
Clover
-
Last edited by macgub on Tue Mar 27, 2018 1:34 pm, edited 1 time in total.
Please, attach sources too, to prevent situation, when you've changed site url, and file will be unavailable. Good job!
OK
Code: Select all
; clover renderer written by randall
; orginally 64 bit code and sse3 instructions for Linux
; ported to KoibriOS and MenuetOS64 by macgub
; www.macgub.hekko.pl
; now it's 32 bit code and sse2 instructions
Menuet = 1
Kolibri = 0
OS = Kolibri
IMG_W=600
IMG_H=600
use32
org 0x0
db 'MENUET01' ; 8 byte id
dd 0x01 ; header version
dd START ; start of code
dd IMG_END ; size of image
dd I_END ;0x100000 ; memory for app
dd I_END ;0xbffff ; esp
dd 0x0 , 0x0 ; I_Param , I_Icon
START: ; start of execution
; call main
call draw_window
call main
call draw_from_buffer
still:
; mov eax,23 ; wait here for event
; mov ebx,timeout
; int 0x40
; mov eax,11 ; check for event no wait
mov eax,10 ; wait for event
int 0x40
cmp eax,1 ; redraw request ?
je red
cmp eax,2 ; key in buffer ?
je key
cmp eax,3 ; button in buffer ?
je button
jmp noclose
red: ; redraw
call draw_window
call draw_from_buffer
jmp still
key: ; key
mov eax,2 ; just read it and ignore
int 0x40
shr eax,8
cmp eax, 27
jne still
mov eax, -1
int 0x40
button: ; button
mov eax,17 ; get id
int 0x40
cmp ah,1 ; button id=1 ?
jne noclose
mov eax,-1 ; close this program
int 0x40
noclose:
jmp still
draw_from_buffer:
mov eax,7
mov ebx,screen4
mov ecx,IMG_W*65536+IMG_H
if OS = Kolibri
mov edx,0*65536+0
else
mov edx,10*65536+25
end if
int 0x40
ret
;-------------------------------------------------------------------------------
; NAME: SMOOTHSTEP
; IN: xmm0 edge0
; IN: xmm1 edge1
; IN: xmm2 x
; OUT: xmm0 smoothstep(edge0, edge1, x) like in GLSL
;-------------------------------------------------------------------------------
macro SMOOTHSTEP {
subps xmm2,xmm0 ; x - edge0
subps xmm1,xmm0 ; edge1 - edge0
divps xmm2,xmm1
minps xmm2,dqword [g_1_0]
maxps xmm2,dqword [g_0_0]
movaps xmm0,dqword [g_3_0]
movaps xmm1,xmm2
addps xmm1,xmm1
subps xmm0,xmm1
mulps xmm2,xmm2
mulps xmm0,xmm2
}
;-------------------------------------------------------------------------------
;segment readable executable
;-------------------------------------------------------------------------------
; NAME: sin
; IN: xmm0 | w z y x |
; OUT: xmm0 | sin(w) sin(z) sin(y) sin(x) |
;-------------------------------------------------------------------------------
align 16
sin:
movaps xmm7,xmm0
andps xmm0,dqword [g_inv_sign_mask]
andps xmm7,dqword [g_sign_mask]
mulps xmm0,dqword [g_2_div_pi]
pxor xmm3,xmm3
movdqa xmm5,dqword [g_1]
movaps xmm4,dqword [g_1_0]
cvttps2dq xmm2,xmm0
pand xmm5,xmm2
pcmpeqd xmm5,xmm3
cvtdq2ps xmm6,xmm2
pand xmm2,dqword [g_2]
pslld xmm2,30
subps xmm0,xmm6
minps xmm0,xmm4
subps xmm4,xmm0
andps xmm0,xmm5
andnps xmm5,xmm4
orps xmm0,xmm5
movaps xmm1,xmm0
mulps xmm0,xmm0
xorps xmm2,xmm7
orps xmm1,xmm2
movaps xmm7,xmm0
mulps xmm0,dqword [g_sincos_p3]
addps xmm0,dqword [g_sincos_p2]
mulps xmm0,xmm7
addps xmm0,dqword [g_sincos_p1]
mulps xmm0,xmm7
addps xmm0,dqword [g_sincos_p0]
mulps xmm0,xmm1
ret
;-------------------------------------------------------------------------------
; NAME: atanr2
; IN: xmm0 | w0 z0 y0 x0 |
; IN: xmm1 | w1 z1 y1 x1 |
; OUT: xmm0 | atan2(w0,1/w1) atan2(z0,1/z1) atan2(y0,1/y1) atan2(x0,1/x1) |
;-------------------------------------------------------------------------------
align 16
atanr2:
movaps xmm2,dqword [g_sign_mask]
xorps xmm3,xmm3
movaps xmm5,dqword [g_1_0]
andps xmm2,xmm0
mulps xmm0,xmm1
orps xmm2,dqword [g_pi]
cmpleps xmm3,xmm1
movaps xmm6,dqword [g_m1_0]
rcpps xmm4,xmm0
cmpltps xmm5,xmm0
cmpnleps xmm6,xmm0
movaps xmm1,dqword [g_atan_s0]
orps xmm5,xmm6
movaps [xxmm8],xmm2
movaps [xxmm9],xmm3
andps xmm4,xmm5
movaps xmm2,dqword [g_atan_t0]
movaps xmm7,xmm5
andnps xmm5,xmm0
movaps xmm3,dqword [g_atan_s1]
orps xmm4,xmm5
movaps xmm0,xmm4
movaps xmm6,dqword [g_atan_t1]
mulps xmm4,xmm4
addps xmm1,xmm4
movaps xmm5,dqword [g_atan_s2]
rcpps xmm1,xmm1
mulps xmm1,xmm2
movaps xmm2,dqword [g_atan_t2]
addps xmm3,xmm4
addps xmm1,xmm3
movaps xmm3,dqword [g_atan_s3]
rcpps xmm1,xmm1
mulps xmm1,xmm6
movaps xmm6,dqword [g_atan_t3]
addps xmm5,xmm4
addps xmm1,xmm5
movaps xmm5,dqword [g_sign_mask]
rcpps xmm1,xmm1
mulps xmm1,xmm2
addps xmm3,xmm4
movaps xmm4,dqword [g_0_5pi]
mulps xmm6,xmm0
addps xmm1,xmm3
andps xmm0,xmm5
rcpps xmm1,xmm1
movaps xmm3,[xxmm9]
mulps xmm1,xmm6
orps xmm0,xmm4
subps xmm0,xmm1
movaps xmm2,[xxmm8]
andps xmm0,xmm7
andnps xmm7,xmm1
orps xmm0,xmm7
movaps xmm1,xmm0
andps xmm0,xmm3
addps xmm1,xmm2
andnps xmm3,xmm1
orps xmm0,xmm3
ret
;-------------------------------------------------------------------------------
; NAME: ComputeColors
; IN: edi pointer to 4 image pixels
; IN: xmm0 | x3 x2 x1 x0 | normalized pixel x coordinates
; IN: xmm1 | y3 y2 y1 y0 | normalized pixel y coordinates
; OUT: xmm0 | ? b g r | pixel color
;-------------------------------------------------------------------------------
align 16
ComputeColors:
; r = sqrt(x * x + y * y)
movaps xmm2,xmm0
mulps xmm2,xmm0
movaps xmm3,xmm1
mulps xmm3,xmm1
addps xmm2,xmm3
movaps [xxmm3],xmm3
movaps xmm3,[xxmm15]
sqrtps xmm3,xmm2 ; xmm15 = r
movaps [xxmm15],xmm3
movaps xmm3,[xxmm3]
; a = atan2(y, x) = atanr2(y, 1/x)
movaps xmm2,xmm0
movaps xmm0,xmm1
movaps xmm1,xmm2
rcpps xmm1,xmm1
call atanr2
movaps [xxmm14],xmm0 ; xmm14 = a
; s = 0.5 + 0.5 * sin(3.0 * a)
mulps xmm0,dqword [g_3_0]
movaps [xxmm12],xmm0 ; xmm12 = 3.0 * a
call sin
movaps xmm1,dqword [g_0_5]
mulps xmm0,xmm1
addps xmm0,xmm1
movaps [xxmm13],xmm0 ; xmm13 = s
; g = sin(0.5 * pi + 3.0 * a)
movaps xmm0,[xxmm12]
addps xmm0,dqword [g_0_5pi]
call sin ; xmm0 = g
; d = 0.3 + 0.6 * sqrt(s) + 0.15 * g * g
mulps xmm0,xmm0
mulps xmm0,dqword [g_0_15]
movaps [xxmm3],xmm3
movaps xmm3,[xxmm13]
sqrtps xmm3,xmm3
mulps xmm3,dqword [g_0_6]
movaps xmm3,[xxmm3]
addps xmm0,[xxmm13]
addps xmm0,dqword [g_0_3] ; xmm0 = d
; h = r / d
movaps [xxmm3],xmm3
movaps xmm3,[xxmm15]
divps xmm3,xmm0 ; xmm15 = h
; f = 1.0 - smoothstep(0.95, 1.0, h)
movaps [xxmm15],xmm3
movaps xmm3,[xxmm3]
movaps xmm0,dqword [g_0_95] ; edge0
movaps xmm1,dqword [g_1_0] ; edge1
movaps xmm2,[xxmm15] ; x
SMOOTHSTEP
movaps [xxmm3],xmm3
movaps xmm3,dqword [g_1_0]
subps xmm3,xmm0 ; xmm14 = f
movaps [xxmm14],xmm3
movaps xmm3,[xxmm3]
;
movaps xmm0,[xxmm12]
call sin
movaps xmm2,xmm0
movaps xmm0,[xxmm15]
mulps xmm0,dqword [g_0_05]
addps xmm0,dqword [g_0_95]
movaps xmm1,dqword [g_1_0]
SMOOTHSTEP
movaps xmm1,dqword [g_1_0]
movaps xmm2,xmm1
subps xmm1,[xxmm15]
mulps xmm1,dqword [g_0_5]
mulps xmm0,xmm1
subps xmm2,xmm0
movaps [xxmm3],xmm3
movaps xmm3,[xxmm15]
mulps xmm3,xmm2
movaps [xxmm15],xmm3
movaps xmm3,[xxmm3]
;
movaps xmm6,dqword [g_1_0]
movaps xmm7,dqword [g_c0]
mulps xmm7,[xxmm15]
addps xmm7,dqword [g_c1]
movaps xmm0,[xxmm14]
shufps xmm0,xmm0,00000000b ; xmm0 = | f f f f |
movaps xmm1,xmm6
subps xmm1,xmm0 ; xmm1 = | 1-f 1-f 1-f 1-f |
mulps xmm0,xmm7
mulps xmm1,xmm6
addps xmm0,xmm1
movaps [edi+0],xmm0
movaps xmm0,[xxmm14]
shufps xmm0,xmm0,01010101b ; xmm0 = | f f f f |
movaps xmm1,xmm6
subps xmm1,xmm0 ; xmm1 = | 1-f 1-f 1-f 1-f |
mulps xmm0,xmm7
mulps xmm1,xmm6
addps xmm0,xmm1
movaps [edi+16],xmm0
movaps xmm0,[xxmm14]
shufps xmm0,xmm0,10101010b ; xmm0 = | f f f f |
movaps xmm1,xmm6
subps xmm1,xmm0 ; xmm1 = | 1-f 1-f 1-f 1-f |
mulps xmm0,xmm7
mulps xmm1,xmm6
addps xmm0,xmm1
movaps [edi+32],xmm0
movaps xmm0,[xxmm14]
shufps xmm0,xmm0,11111111b ; xmm0 = | f f f f |
movaps xmm1,xmm6
subps xmm1,xmm0 ; xmm1 = | 1-f 1-f 1-f 1-f |
mulps xmm0,xmm7
mulps xmm1,xmm6
addps xmm0,xmm1
movaps [edi+48],xmm0
ret
;-------------------------------------------------------------------------------
;-------------------------------------------------------------------------------
main:
imgptr equ ebp-8
imgptr_float equ ebp-16
x equ ebp-32
y equ ebp-48
push ebp
mov ebp,esp
sub esp,64
mov dword [imgptr],screen4
mov dword [imgptr_float],screen16
mov ebx,[imgptr_float]
;;
;; Compute image
;;
; begin loops
pxor xmm0,xmm0
movdqu [y],xmm0
.LoopY:
movdqa xmm0,dqword [g_3_2_1_0]
movdqu [x],xmm0
.LoopX:
mov edi,ebx
; compute normalized x coordinate [-1.0 , 1.0]
movups xmm0,[x]
cvtdq2ps xmm0,xmm0
divps xmm0,dqword [g_img_w]
subps xmm0,dqword [g_0_5]
addps xmm0,xmm0
; compute normalized y coordinate [-1.0 , 1.0]
movups xmm1,[y]
cvtdq2ps xmm1,xmm1
divps xmm1,dqword [g_img_h]
subps xmm1,dqword [g_0_5]
addps xmm1,xmm1
call ComputeColors
; advance pixel pointer
add ebx,64
; continue .LoopX
movdqu xmm0,[x]
paddd xmm0,dqword [g_4]
movdqu [x],xmm0
cmp dword [x+12],IMG_W
jb .LoopX
; continue .LoopY
movdqu xmm0,[y]
paddd xmm0,dqword [g_1]
movdqu [y],xmm0
cmp dword [y],IMG_H
jb .LoopY
;;
;; Convert image
;;
mov esi,[imgptr_float]
mov edi,[imgptr]
mov ecx,IMG_W*IMG_H
.Convert:
movaps xmm0,[esi]
maxps xmm0,dqword [g_0_0]
minps xmm0,dqword [g_1_0]
mulps xmm0,dqword [g_255_0]
cvttps2dq xmm0,xmm0
movd eax,xmm0
or eax,0xff000000
mov [edi],eax
add esi,16
add edi,4
sub ecx,1
jnz .Convert
mov esi,[imgptr] ; convert again :)
mov edi,esi
cld
mov ecx,IMG_W*IMG_H
@@:
lodsd
xchg ah,al
stosd
dec edi
loop @b
mov esp,ebp
pop ebp
ret
; *********************************************
; ******* WINDOW DEFINITIONS AND DRAW ********
; *********************************************
draw_window:
mov eax,12 ; function 12:tell os about windowdraw
mov ebx,1 ; 1, start of draw
int 0x40
; DRAW WINDOW
mov eax,0 ; function 0 : define and draw window
if OS = Kolibri
mov ebx,100*65536+IMG_W+9 ; [x start] *65536 + [x size]
mov ecx,100*65536+IMG_H+25 ; [y start] *65536 + [y size]
else
mov ebx,100*65536+IMG_W+18 ; [x start] *65536 + [x size]
mov ecx,100*65536+IMG_H+34 ; [y start] *65536 + [y size]
end if
mov edx,0x74000000 ; color of work area RRGGBB,8->color gl
mov edi,labelt
int 0x40
; WINDOW LABEL
mov eax,4 ; function 4 : write text to window
mov ebx,8*65536+8 ; [x start] *65536 + [y start]
mov ecx,0x20ddeeff ; font 1 & color ( 0xF0RRGGBB )
mov edx,labelt ; pointer to text beginning
mov esi,labelen-labelt ; text length
int 0x40
; WINDOW LABEL
mov eax,12 ; function 12:tell os about windowdraw
mov ebx,2 ; 2, end of draw
int 0x40
ret
;-------------------------------------------------------------------------------
;align 1
labelt:
db 'clover',0
labelen:
align 16
g_img_w dd 4 dup 600.0
g_img_h dd 4 dup 600.0
g_c0 dd 0.4,0.3,0.0,0.0
g_c1 dd 0.0,0.2,0.0,0.0
g_3_0 dd 4 dup 3.0
g_0_3 dd 4 dup 0.3
g_0_6 dd 4 dup 0.6
g_0_15 dd 4 dup 0.15
g_0_95 dd 4 dup 0.95
g_0_05 dd 4 dup 0.05
;g_img_conv_mask db 8,4,0,12,12 dup 0x80
g_0_0 dd 4 dup 0.0
g_0_5 dd 4 dup 0.5
g_1_0 dd 4 dup 1.0
g_m1_0 dd 4 dup -1.0
g_255_0 dd 4 dup 255.0
g_1 dd 4 dup 1
g_2 dd 4 dup 2
g_4 dd 4 dup 4
g_3_2_1_0 dd 0,1,2,3
g_sign_mask dd 4 dup 0x80000000
g_inv_sign_mask dd 4 dup not 0x80000000
g_0_5pi dd 4 dup 1.57079633
g_2_div_pi dd 4 dup 0.636619772
g_pi dd 4 dup 3.1415926535897
g_sincos_p0 dd 4 dup 1.5707963267948963959
g_sincos_p1 dd 4 dup -0.64596409750621907082
g_sincos_p2 dd 4 dup 0.07969262624561800806
g_sincos_p3 dd 4 dup -0.00468175413106023168
g_atan_t0 dd 4 dup -0.091646118527267623468
g_atan_t1 dd 4 dup -1.3956945682312098640
g_atan_t2 dd 4 dup -94.393926122725531747
g_atan_t3 dd 4 dup 12.888383034157279340
g_atan_s0 dd 4 dup 1.2797564625607904396
g_atan_s1 dd 4 dup 2.1972168858277355914
g_atan_s2 dd 4 dup 6.8193064729268275701
g_atan_s3 dd 4 dup 28.205206687035841409
;-------------------------------------------------------------------------------
IMG_END:
align 16
;xx:
; rb 16
;yy:
; rb 16
xxmm3:
rb 16
xxmm8:
rb 16
xxmm9:
rb 16
xxmm10:
rb 16
xxmm12:
rb 16
xxmm13:
rb 16
xxmm14:
rb 16
xxmm15:
rb 16
align 16
screen4 rb IMG_W * IMG_H * 4
screen16 rb IMG_W * IMG_H * 16
memStack rb 4000
I_END:
Who is online
Users browsing this forum: No registered users and 0 guests