|
Das deutsche QBasic- und FreeBASIC-Forum Für euch erreichbar unter qb-forum.de, fb-forum.de und freebasic-forum.de!
|
Vorheriges Thema anzeigen :: Nächstes Thema anzeigen |
Autor |
Nachricht |
UEZ
Anmeldungsdatum: 24.06.2016 Beiträge: 130 Wohnort: Opel Stadt
|
Verfasst am: 12.08.2016, 14:40 Titel: Schnellere Sin / Cos ASM Funktionen |
|
|
Ich habe nach einer schnellere Variante für Sinus / Cosinus (noch nicht implementiert!) gesucht und habe diese Assembler Codes dazu gefunden, aber das Resultat ist nicht richtig.
Code: | Dim Shared As Single cc, ss, x, y, z
Const As Single fMul = 2^31 / ACos(-1), fDiv = -&h9A68000
Declare Sub ASM_Test(t As Single)
Declare Sub Rotate(t As Single)
Sub ASM_Test(t As Single)
Var Sinus = @ss
Var Cosinus = @cc
Var zz = @z
Var yy = @y
Var xx = @x
Dim As Single bias = 12582912.0, pi0 = ACos(-1), inv_pi = 0.318309886, p0 = 2.601429e-6, p1 = -1.980698e-4, p2 = +8.333017e-3, p3 = -1.666665e-1
Dim As Single pi1 = 9.670257568359375e-4, pi2 = 6.2771141529083251953125e-7, pi3 = 1.2154201013012385202950e-10
Asm
'danke an Eukalyptus
mov ebx, [t]
lea eax, [ebx+ebx+&h80000000]
sar eax, 2
imul eax
sar ebx, 31
lea eax, [edx+edx-&h70000000]
lea ecx, [edx+edx*8-&h24000000]
imul edx
xor ecx, ebx
lea eax, [edx+edx*8+&h44A00000]
imul ecx
mov [Sinus], edx
/'
mov eax,[t]
movd xmm0,eax
movss xmm1,[inv_pi]
movss xmm3,[bias]
mulss xmm1,xmm0
addss xmm1,xmm3
movss xmm2,xmm1
subss xmm1,xmm3
movss xmm4,[pi1]
movss xmm3,[pi0]
mulss xmm3,xmm1
pslld xmm2,31
subss xmm0,xmm3
mulss xmm4,xmm1
movss xmm6,[p0]
subss xmm0,xmm4
movss xmm5,[pi2]
mulss xmm5,xmm1
subss xmm0,xmm5
mulss xmm1,[pi3]
subss xmm0,xmm1
movss xmm7,xmm0
mulss xmm7,xmm7
mulss xmm6,xmm7
pxor xmm0,xmm2
addss xmm6,[p1]
mulss xmm6,xmm7
addss xmm6,[p2]
mulss xmm6,xmm7
addss xmm6,[p3]
mulss xmm6,xmm7
mulss xmm6,xmm0
addss xmm0,xmm6
movss [Sinus],xmm0
'/
End Asm
ss /= fDiv
cc /= fDiv
z = x * cc - y * ss
y = x * ss + y * cc
x = z
End Sub
Sub Rotate(t As Single)
ss = Sin(t)
cc = Cos(t)
z = x * cc - y * ss
y = x * ss + y * cc
x = z
End Sub
x = 2.0
y = 3.0
Dim As Double fPI = ACos(-1)
ASM_Test(0.5 * fMul)
Print "built-in Sin", Sin(0.5)
Print "built-in Cos", Cos(0.5)
'Print "x", x
'Print "y", y
'Print "z", z
Print "sin", ss
Print "cos", cc
Sleep
|
Hat jemand eine Idee warum das nicht richtig Funktioniert (auch die MMX Variante)?
Btw, gibt es einen ASM Debugger? _________________ Gruß,
UEZ |
|
Nach oben |
|
|
Eukalyptus
Anmeldungsdatum: 17.05.2013 Beiträge: 11
|
Verfasst am: 14.08.2016, 15:44 Titel: |
|
|
Hi UEZ!
Deinen Code hab ich mir noch nicht angesehen, aber ich hab hier ein paar Funktionen für dich:
Code: | #Include "string.bi"
Function _Sin6th(fX As Double) As Double
Asm
jmp _Sin6th_Start
_Sin6th_Mul: .double 683565275.57643158
_Sin6th_Div: .double -0.0000000061763971109087229
_Sin6th_Rnd: .double 6755399441055744.0
_Sin6th_Start:
movq xmm0, [fX]
mulsd xmm0, [_Sin6th_Mul]
addsd xmm0, [_Sin6th_Rnd]
movd ebx, xmm0
lea eax, [ebx*2+0x80000000]
sar eax, 2
imul eax
sar ebx, 31
lea eax, [edx*2-0x70000000]
lea ecx, [edx*8+edx-0x24000000]
imul edx
xor ecx, ebx
lea eax, [edx*8+edx+0x44A00000]
imul ecx
cvtsi2sd xmm0, edx
mulsd xmm0, [_Sin6th_Div]
movq [Function], xmm0
End Asm
End Function
Function _SinParabolic(fX As Double) As Double
Asm
jmp _SinPar_Start
_SinPar_Mul: .double 683565275.57643158
_SinPar_Div: .double -0.00000000046566128730773925
_SinPar_Rnd: .double 6755399441055744.0
_SinPar_Start:
movq xmm0, [fX]
mulsd xmm0, [_SinPar_Mul]
addsd xmm0, [_SinPar_Rnd]
movd ebx, xmm0
lea eax, [0x80000000+ebx*2]
sar ebx, 31
imul eax
lea eax, [0x80000000+edx*2]
xor eax, ebx
cvtsi2sd xmm0, eax
mulsd xmm0, [_SinPar_Div]
movq [Function], xmm0
End Asm
End Function
Sub _Sin4X(pResult As Single Ptr, pX As Single Ptr)
Asm
jmp _Sin4X_Start
.align 16
_Sin4X_InvPI: .float 0.318309886183791, 0.318309886183791, 0.318309886183791, 0.318309886183791
_Sin4X_Rnd: .float 12582912.0, 12582912.0, 12582912.0, 12582912.0
_Sin4X_PI: .float 3.14159265358979, 3.14159265358979, 3.14159265358979, 3.14159265358979
_Sin4X_A: .float 0.00735246819687012, 0.00735246819687012, 0.00735246819687012, 0.00735246819687012
_Sin4X_B: .float -0.165289113970147, -0.165289113970147, -0.165289113970147, -0.165289113970147
_Sin4X_C: .float 0.999691986295968, 0.999691986295968, 0.999691986295968, 0.999691986295968
_Sin4X_Neg: .long &h80000000, &h80000000, &h80000000, &h80000000
_Sin4X_Start:
mov esi, [pX]
mov edi, [pResult]
movups xmm0, [esi] 'fX4, fX3, fX2, fX1
'k = _Round(INVPI * fX)
movaps xmm3, xmm0
mulps xmm3, [_Sin4X_InvPI]
addps xmm3, [_Sin4X_Rnd]
pslld xmm3, 10
psrad xmm3, 10
cvtdq2ps xmm1, xmm3
'fX -= k * PI
mulps xmm1, [_Sin4X_PI]
subps xmm0, xmm1
'X2 = fX * fX
'fX = fX*(C + X2*(B + A*X2))
movaps xmm1, xmm0
mulps xmm1, xmm1
movaps xmm2, [_Sin4X_A]
mulps xmm2, xmm1
addps xmm2, [_Sin4X_B]
mulps xmm1, xmm2
addps xmm1, [_Sin4X_C]
mulps xmm0, xmm1
'If (k Mod 2) Then fX = -fX
pslld xmm3, 31
psrad xmm3, 32
andps xmm3, [_Sin4X_Neg]
xorps xmm3, xmm0 '-fX4, -fX3, -fX2, -fX1
movups [edi], xmm3
End Asm
End Sub
Function _SinFast(fX As Double) As Double
Asm
jmp _SinFast_Start
.align 16
_SinFast_InvPI: .double 0.318309886183790671537767526745028724
_SinFast_Rnd: .double 6755399441055744.0
_SinFast_PI: .double 3.14159265358979323846264338327950288
_SinFast_A: .double 0.00735246819687011731341356165096815
_SinFast_B: .double -0.16528911397014738207016302002888890
_SinFast_C: .double 0.99969198629596757779830113868360584
_SinFast_Start:
movq xmm3, [fX]
'k = _Round(INVPI * fX)
movq xmm0, xmm3
mulsd xmm3, [_SinFast_InvPI]
addsd xmm3, [_SinFast_Rnd]
cvtdq2pd xmm1, xmm3
movd eax, xmm3
'fX -= k * PI
mulsd xmm1, [_SinFast_PI]
subsd xmm0, xmm1
'X2 = fX * fX
'fX = fX*(C + X2*(B + A*X2))
movq xmm1, xmm0
mulsd xmm1, xmm1
movq xmm2, [_SinFast_A]
mulsd xmm2, xmm1
addsd xmm2, [_SinFast_B]
mulsd xmm1, xmm2
addsd xmm1, [_SinFast_C]
mulsd xmm0, xmm1
'If (k Mod 2) Then fX = -fX
test eax, 1
jz _SinFast_End
movq xmm1, xmm0
xorps xmm0, xmm0
subsd xmm0, xmm1
_SinFast_End:
movq [Function], xmm0
End Asm
End Function
Dim As Integer iRange = 5
Screen 20, 32
Window (-iRange, -1.3) - (iRange, 1.3)
Dim As Single fStep, aX(4), aRes(4)
fStep = 1 / PMap(1, 0)
ScreenLock()
For x As Single = -iRange To iRange Step fStep
PSet(x, Sin(x)), &h00FF00
Next x
Locate 2, 20: Print "Sin normal "
Locate 3, 20: Print "press enter..."
ScreenUnlock()
Sleep
ScreenLock()
For x As Single = -iRange To iRange Step fStep
PSet(x, _SinParabolic(x)), &hFF0000
Next
Locate 2, 20: Print "Sin parabolic "
Locate 3, 20: Print "press enter..."
ScreenUnlock()
Sleep
ScreenLock()
For x As Single = -iRange To iRange Step fStep
PSet(x, _Sin6th(x)), &h0000FF
Next
Locate 2, 20: Print "Sin 6th order "
Locate 3, 20: Print "press enter..."
ScreenUnlock()
Sleep
ScreenLock()
For x As Single = -iRange To iRange Step fStep
PSet(x, _SinFast(x)), &h00DDFF
Next
Locate 2, 20: Print "Sin Fast "
Locate 3, 20: Print "press enter..."
ScreenUnlock()
Sleep
ScreenLock()
For x As Single = -iRange To iRange Step fStep
aX(0) = x
_Sin4X(@aRes(0), @aX(0))
PSet(x, aRes(0)), &hFFDD00
Next
Locate 2, 20: Print "Sin 4-Fach "
Locate 3, 20: Print "press enter..."
ScreenUnlock()
Sleep
Locate 2, 20: Print "Testing Speed... "
Locate 3, 20: Print " "
Dim As Double aTimer(5), fTmp
Dim As Integer iMax = 10000000
aTimer(0) = Timer
For i As Integer = 0 To iMax
fTmp = Sin(i)
Next
aTimer(0) = (Timer - aTimer(0)) * 1000
aTimer(1) = Timer
For i As Integer = 0 To iMax
fTmp = _SinParabolic(i)
Next
aTimer(1) = (Timer - aTimer(1)) * 1000
aTimer(2) = Timer
For i As Integer = 0 To iMax
fTmp = _Sin6th(i)
Next
aTimer(2) = (Timer - aTimer(2)) * 1000
aTimer(3) = Timer
For i As Integer = 0 To iMax
fTmp = _SinFast(i)
Next
aTimer(3) = (Timer - aTimer(3)) * 1000
aX(0) = 1
aX(1) = 2
aX(2) = 3
aX(3) = 4
aTimer(4) = Timer
For i As Integer = 0 To iMax
_Sin4X(@aRes(0), @aX(0))
Next
aTimer(4) = (Timer - aTimer(4)) * 1000
Dim As Integer iError = 100000
Dim As Single fSin, fX, aError(5)
For i As Integer = 0 To iError
fX = (Rnd - 0.5) * 2^16
aX(0) = fX
fSin = Sin(fX)
aError(1) += Abs(fSin - _SinParabolic(fX))
aError(2) += Abs(fSin - _Sin6th(fX))
aError(3) += Abs(fSin - _SinFast(fX))
_Sin4X(@aRes(0), @aX(0))
aError(4) += Abs(fSin - aRes(0))
Next
Locate 3, 22: Print "Normal: " & Format(aTimer(0), "0.00 ms")
Locate 4, 22: Print "Prabolic: " & Format(aTimer(1), "0.00 ms") & " Error: " & Format(aError(1) / iError, "0.00000 %")
Locate 5, 22: Print "6th Order: " & Format(aTimer(2), "0.00 ms") & " Error: " & Format(aError(2) / iError, "0.00000 %")
Locate 6, 22: Print "Fast: " & Format(aTimer(3), "0.00 ms") & " Error: " & Format(aError(3) / iError, "0.00000 %")
Locate 7, 22: Print "4-Fach: " & Format(aTimer(4), "0.00 ms") & " Error: " & Format(aError(4) / iError, "0.00000 %")
Sleep
|
Die Funktionen Parabolic u 6thOrder laufen bei einem Wertebereich von +-2^22 ziemlich konstant, ab 2^23 steigt die Abweichung enorm an.
4-Fach ist am schnellsten, da man gleichzeitig 4 Sinus berechnen kann.
Jedoch steigt der Error-Wert proportional zum Wertebereich
Am genauesten ist SinFast mit einer durchgängigen Abweichung von 0.012% bis Wertebereich +-2^31
Obwohl Parabolic die größte Abweichung zu einem Sinus hat, ist die Funktion denoch für Audiosynthese interessant, weil nur ein harmonischer! Oberton entsteht...
lgE |
|
Nach oben |
|
|
UEZ
Anmeldungsdatum: 24.06.2016 Beiträge: 130 Wohnort: Opel Stadt
|
Verfasst am: 14.08.2016, 23:29 Titel: |
|
|
Vielen Dank Eukalyptus
Hast du noch die Cosinus Funktionen? Dann könnte ich testen, wie viel Schub die "brotlose Kunst" erhält. _________________ Gruß,
UEZ |
|
Nach oben |
|
|
Eukalyptus
Anmeldungsdatum: 17.05.2013 Beiträge: 11
|
Verfasst am: 16.08.2016, 13:24 Titel: |
|
|
OK - hier mal derselbe Code mit Cos:
Code: | #Include "string.bi"
Function _Cos6th(fX As Double) As Double
Asm
jmp _Cos6th_Start
_Cos6th_Mul: .double 683565275.57643158
_Cos6th_Div: .double -0.0000000061763971109087229
_Cos6th_Rnd: .double 6755399441055744.0
_Cos6th_Start:
movq xmm0, [fX]
mulsd xmm0, [_Cos6th_Mul]
addsd xmm0, [_Cos6th_Rnd]
movd ebx, xmm0
add ebx, 0x40000000 'SinToCos
lea eax, [ebx*2+0x80000000]
sar eax, 2
imul eax
sar ebx, 31
lea eax, [edx*2-0x70000000]
lea ecx, [edx*8+edx-0x24000000]
imul edx
xor ecx, ebx
lea eax, [edx*8+edx+0x44A00000]
imul ecx
cvtsi2sd xmm0, edx
mulsd xmm0, [_Cos6th_Div]
movq [Function], xmm0
End Asm
End Function
Function _CosParabolic(fX As Double) As Double
Asm
jmp _CosPar_Start
_CosPar_Mul: .double 683565275.57643158
_CosPar_Div: .double -0.00000000046566128730773925
_CosPar_Rnd: .double 6755399441055744.0
_CosPar_Start:
movq xmm0, [fX]
mulsd xmm0, [_CosPar_Mul]
addsd xmm0, [_CosPar_Rnd]
movd ebx, xmm0
add ebx, 0x40000000 'SinToCos
lea eax, [0x80000000+ebx*2]
sar ebx, 31
imul eax
lea eax, [0x80000000+edx*2]
xor eax, ebx
cvtsi2sd xmm0, eax
mulsd xmm0, [_CosPar_Div]
movq [Function], xmm0
End Asm
End Function
Sub _Cos4X(pResult As Single Ptr, pX As Single Ptr)
Asm
jmp _Cos4X_Start
.align 16
_Cos4X_InvPI: .float 0.318309886183791, 0.318309886183791, 0.318309886183791, 0.318309886183791
_Cos4X_Rnd: .float 12582912.0, 12582912.0, 12582912.0, 12582912.0
_Cos4X_PI: .float 3.14159265358979, 3.14159265358979, 3.14159265358979, 3.14159265358979
_Cos4X_A: .float 0.00735246819687012, 0.00735246819687012, 0.00735246819687012, 0.00735246819687012
_Cos4X_B: .float -0.165289113970147, -0.165289113970147, -0.165289113970147, -0.165289113970147
_Cos4X_C: .float 0.999691986295968, 0.999691986295968, 0.999691986295968, 0.999691986295968
_Cos4X_Cos: .float 1.570796326794896, 1.570796326794896, 1.570796326794896, 1.570796326794896
_Cos4X_Neg: .long &h80000000, &h80000000, &h80000000, &h80000000
_Cos4X_Start:
mov esi, [pX]
mov edi, [pResult]
movups xmm0, [esi] 'fX4, fX3, fX2, fX1
addps xmm0, [_Cos4X_Cos]
'k = _Round(INVPI * fX)
movaps xmm3, xmm0
mulps xmm3, [_Cos4X_InvPI]
addps xmm3, [_Cos4X_Rnd]
pslld xmm3, 10
psrad xmm3, 10
cvtdq2ps xmm1, xmm3
'fX -= k * PI
mulps xmm1, [_Cos4X_PI]
subps xmm0, xmm1
'X2 = fX * fX
'fX = fX*(C + X2*(B + A*X2))
movaps xmm1, xmm0
mulps xmm1, xmm1
movaps xmm2, [_Cos4X_A]
mulps xmm2, xmm1
addps xmm2, [_Cos4X_B]
mulps xmm1, xmm2
addps xmm1, [_Cos4X_C]
mulps xmm0, xmm1
'If (k Mod 2) Then fX = -fX
pslld xmm3, 31
psrad xmm3, 32
andps xmm3, [_Cos4X_Neg]
xorps xmm3, xmm0 '-fX4, -fX3, -fX2, -fX1
movups [edi], xmm3
End Asm
End Sub
Function _CosFast(fX As Double) As Double
Asm
jmp _CosFast_Start
.align 16
_CosFast_InvPI: .double 0.318309886183790671537767526745028724
_CosFast_Rnd: .double 6755399441055744.0
_CosFast_PI: .double 3.14159265358979323846264338327950288
_CosFast_A: .double 0.00735246819687011731341356165096815
_CosFast_B: .double -0.16528911397014738207016302002888890
_CosFast_C: .double 0.99969198629596757779830113868360584
_CosFast_Cos: .double 1.57079632679489661923132169163975144
_CosFast_Start:
movq xmm3, [fX]
addsd xmm3, [_CosFast_Cos]
'k = _Round(INVPI * fX)
movq xmm0, xmm3
mulsd xmm3, [_CosFast_InvPI]
addsd xmm3, [_CosFast_Rnd]
cvtdq2pd xmm1, xmm3
movd eax, xmm3
'fX -= k * PI
mulsd xmm1, [_CosFast_PI]
subsd xmm0, xmm1
'X2 = fX * fX
'fX = fX*(C + X2*(B + A*X2))
movq xmm1, xmm0
mulsd xmm1, xmm1
movq xmm2, [_CosFast_A]
mulsd xmm2, xmm1
addsd xmm2, [_CosFast_B]
mulsd xmm1, xmm2
addsd xmm1, [_CosFast_C]
mulsd xmm0, xmm1
'If (k Mod 2) Then fX = -fX
test eax, 1
jz _CosFast_End
movq xmm1, xmm0
xorps xmm0, xmm0
subsd xmm0, xmm1
_CosFast_End:
movq [Function], xmm0
End Asm
End Function
Dim As Integer iRange = 5
Screen 20, 32
Window (-iRange, -1.3) - (iRange, 1.3)
Dim As Single fStep, aX(4), aRes(4)
fStep = 1 / PMap(1, 0)
ScreenLock()
For x As Single = -iRange To iRange Step fStep
PSet(x, Cos(x)), &h00FF00
Next x
Locate 2, 20: Print "Cos normal "
Locate 3, 20: Print "press enter..."
ScreenUnlock()
Sleep
ScreenLock()
For x As Single = -iRange To iRange Step fStep
PSet(x, _CosParabolic(x)), &hFF0000
Next
Locate 2, 20: Print "Cos parabolic "
Locate 3, 20: Print "press enter..."
ScreenUnlock()
Sleep
ScreenLock()
For x As Single = -iRange To iRange Step fStep
PSet(x, _Cos6th(x)), &h0000FF
Next
Locate 2, 20: Print "Cos 6th order "
Locate 3, 20: Print "press enter..."
ScreenUnlock()
Sleep
ScreenLock()
For x As Single = -iRange To iRange Step fStep
PSet(x, _CosFast(x)), &h00DDFF
Next
Locate 2, 20: Print "Cos Fast "
Locate 3, 20: Print "press enter..."
ScreenUnlock()
Sleep
ScreenLock()
For x As Single = -iRange To iRange Step fStep
aX(0) = x
_Cos4X(@aRes(0), @aX(0))
PSet(x, aRes(0)), &hFFDD00
Next
Locate 2, 20: Print "Cos 4-Fach "
Locate 3, 20: Print "press enter..."
ScreenUnlock()
Sleep
Locate 2, 20: Print "Testing Speed... "
Locate 3, 20: Print " "
Dim As Double aTimer(5), fTmp
Dim As Integer iMax = 10000000
aTimer(0) = Timer
For i As Integer = 0 To iMax
fTmp = Cos(i)
Next
aTimer(0) = (Timer - aTimer(0)) * 1000
aTimer(1) = Timer
For i As Integer = 0 To iMax
fTmp = _CosParabolic(i)
Next
aTimer(1) = (Timer - aTimer(1)) * 1000
aTimer(2) = Timer
For i As Integer = 0 To iMax
fTmp = _Cos6th(i)
Next
aTimer(2) = (Timer - aTimer(2)) * 1000
aTimer(3) = Timer
For i As Integer = 0 To iMax
fTmp = _CosFast(i)
Next
aTimer(3) = (Timer - aTimer(3)) * 1000
aX(0) = 1
aX(1) = 2
aX(2) = 3
aX(3) = 4
aTimer(4) = Timer
For i As Integer = 0 To iMax
_Cos4X(@aRes(0), @aX(0))
Next
aTimer(4) = (Timer - aTimer(4)) * 1000
Dim As Integer iError = 100000
Dim As Single fCos, fX, aError(5)
For i As Integer = 0 To iError
fX = (Rnd - 0.5) * 2^16
aX(0) = fX
fCos = Cos(fX)
aError(1) += Abs(fCos - _CosParabolic(fX))
aError(2) += Abs(fCos - _Cos6th(fX))
aError(3) += Abs(fCos - _CosFast(fX))
_Cos4X(@aRes(0), @aX(0))
aError(4) += Abs(fCos - aRes(0))
Next
Locate 3, 22: Print "Normal: " & Format(aTimer(0), "0.00 ms")
Locate 4, 22: Print "Prabolic: " & Format(aTimer(1), "0.00 ms") & " Error: " & Format(aError(1) / iError, "0.00000 %")
Locate 5, 22: Print "6th Order: " & Format(aTimer(2), "0.00 ms") & " Error: " & Format(aError(2) / iError, "0.00000 %")
Locate 6, 22: Print "Fast: " & Format(aTimer(3), "0.00 ms") & " Error: " & Format(aError(3) / iError, "0.00000 %")
Locate 7, 22: Print "4-Fach: " & Format(aTimer(4), "0.00 ms") & " Error: " & Format(aError(4) / iError, "0.00000 %")
Sleep
|
Und hier noch die "genaue" Variante, welche Sin und Cos gleichzeitig berechnet:
Code: | #Include "string.bi"
Function _SinCos(fX As Double, ByRef f_Cos As Double = 0) As Double
Asm
jmp _SinCos_Start
.align 16
_SinCos_InvPI: .double 0.318309886183790671537767526745028724, 0.318309886183790671537767526745028724
_SinCos_Rnd: .double 6755399441055744.0, 6755399441055744.0
_SinCos_PI: .double 3.14159265358979323846264338327950288, 3.14159265358979323846264338327950288
_SinCos_A: .double 0.00735246819687011731341356165096815, 0.00735246819687011731341356165096815
_SinCos_B: .double -0.16528911397014738207016302002888890, -0.16528911397014738207016302002888890
_SinCos_C: .double 0.99969198629596757779830113868360584, 0.99969198629596757779830113868360584
_SinCos_Cos: .double 0.0, 1.57079632679489661923132169163975144
_SinCos_Neg: .long 0, 0x80000000, 0, 0x80000000
_SinCos_Start:
movddup xmm3, [fX] '[fX][fX]
mov edi, [f_Cos]
addpd xmm3, [_SinCos_Cos] '[fXCos][fXSin]
movaps xmm0, xmm3 'k = _Round(INVPI * fX)
mulpd xmm3, [_SinCos_InvPI]
addpd xmm3, [_SinCos_Rnd]
pshufd xmm3, xmm3, 0xE8
cvtdq2pd xmm1, xmm3
mulpd xmm1, [_SinCos_PI] 'fX -= k * PI
subpd xmm0, xmm1
movaps xmm1, xmm0 'X2 = fX * fX
mulpd xmm1, xmm1
movaps xmm2, [_SinCos_A] 'fX = fX*(C + X2*(B + A*X2))
mulpd xmm2, xmm1
addpd xmm2, [_SinCos_B]
mulpd xmm1, xmm2
addpd xmm1, [_SinCos_C]
mulpd xmm0, xmm1
pslld xmm3, 31 'If (k Mod 2) Then fX = -fX
psrad xmm3, 31
punpckldq xmm3, xmm3
andps xmm3, [_SinCos_Neg]
xorps xmm3, xmm0 '[-fX2][-fX1]
movq [Function], xmm3
movhpd [edi], xmm3
End Asm
End Function
Dim As Integer iRange = 5
Screen 20, 32
Window (-iRange, -1.3) - (iRange, 1.3)
Dim As Single fStep
fStep = 1 / PMap(1, 0)
ScreenLock()
For x As Single = -iRange To iRange Step fStep
PSet(x, Sin(x)), &h00FF00
PSet(x, Cos(x)), &h00FFFF
Next x
Locate 2, 20: Print "Sin + Cos normal"
Locate 3, 20: Print "press enter..."
ScreenUnlock()
Sleep
Dim As Double fCos
ScreenLock()
For x As Single = -iRange To iRange Step fStep
PSet(x, _SinCos(x, fCos)), &hFF00CC
PSet(x, fCos), &hFFCC00
Next
Locate 2, 20: Print "SinCos Fast "
Locate 3, 20: Print "press enter..."
ScreenUnlock()
Sleep
Locate 2, 20: Print "Testing Speed... "
Locate 3, 20: Print " "
Dim As Integer iMax = 5000000
Dim As Double aTimer(3), fSin
aTimer(0) = Timer
For i As Integer = 0 To iMax
fSin = Sin(i)
fCos = Cos(i)
Next
aTimer(0) = (Timer - aTimer(0)) * 1000
aTimer(1) = Timer
For i As Integer = 0 To iMax
fSin = _SinCos(i, fCos)
Next
aTimer(1) = (Timer - aTimer(1)) * 1000
Locate 3, 22: Print "Sin + Cos Normal: " & Format(aTimer(0), "0.00 ms")
Locate 4, 22: Print "SinCos Fast: " & Format(aTimer(1), "0.00 ms")
Sleep |
Mehr Speed ist noch drinnen, wenn du zb. eine dieser Funktionen in eine ASM-Schleife einbaust und die restlichen freien Register mit den Konstanten befüllst.
Oder wenn du etwa bei 6thOrder gleich im "richtigen" Wertebereich rechnest, dann fallen pro Loop schonmal etliche Berechnungen weg...
lgE |
|
Nach oben |
|
|
UEZ
Anmeldungsdatum: 24.06.2016 Beiträge: 130 Wohnort: Opel Stadt
|
Verfasst am: 16.08.2016, 14:18 Titel: |
|
|
Vielen Dank Eukalyptus.
Um FB zu lernen, versuche ich ein paar ältere Skript nach FB zu konventieren und sie per ASM zu optimieren, was mir noch etwas schwer fällt.
Code: |
'coded by UEZ build 2016-08-16
#include "windows.bi"
#include "fbgfx.bi"
Using FB
Dim Shared As ULong iW, iH, iMaxSize
Dim Shared As ULong Ptr aBitmap
Dim Shared As Single cc, ss, x, y, z
Dim Shared As UByte speed
Declare Sub Rotate(t As Single)
Declare Sub _ASM_Rotate(t As Single)
Declare Sub _FillRect(iX As ULong, iY As ULong, iW As ULong, iH As ULong, iColor As ULong)
Declare Function _ASM_Sqr(n As Single) As Single
iW = 640
iH = iW
speed = 2
iMaxSize = iW * iH
ScreenControl FB.SET_DRIVER_NAME, "GDI"
ScreenRes iW, iH
Dim As String sTitle = "GDI Classic Raytraced Tunnel / FPS: "
WindowTitle sTitle
Dim as HWND hHWND
ScreenControl(FB.GET_WINDOW_HANDLE, Cast(Integer, hHWND))
Dim As HBitmap hBitmapGDI
Dim As HDC hDC, hGfxDC
Dim As BITMAPV5HEADER tBIV5HDR
tBIV5HDR.bV5Size = SizeOf(BITMAPV5HEADER)
tBIV5HDR.bV5Width = iW
tBIV5HDR.bV5Height = -iH
tBIV5HDR.bV5Planes = 1
tBIV5HDR.bV5BitCount = 32
tBIV5HDR.bV5Compression = BI_BITFIELDS
tBIV5HDR.bV5AlphaMask = &hFF000000
tBIV5HDR.bV5RedMask = &h00FF0000
tBIV5HDR.bV5GreenMask = &h0000FF00
tBIV5HDR.bV5BlueMask = &h000000FF
hDC = GetDC(hHWND)
hGfxDC = CreateCompatibleDC(hDC)
hBitmapGDI = CreateDIBSection(hDC, @tBIV5HDR, DIB_RGB_COLORS, @aBitmap, NULL, NULL)
Var hObjOld = SelectObject(hGfxDC, hBitmapGDI)
Dim evt As EVENT
Dim As Double fTimer
Dim As ULong iFPS = 0, iX, iY, g
If __FB_ARGC__ > 1 Then
speed = Cast(UByte, *__FB_ARGV__[1])
speed = IIf(speed < 1, 1, speed)
EndIf
Dim As Single pi = ACos(-1), pi2 = 2 * pi, pi05 = ACos(-1) / 2, HW = iH * 0.5, HH = iW / 2
Dim As Single A0, A1, A2, A3, ox, oy, tu, tv, o, dx, dy, dz, rd, A, B, C, R, t1, q, l, Oz
Dim As Any Ptr hBrush
Type Rectangle
iLeft As Long
iTop As Long
iRight As Long
iBottom As Long
End Type
Dim As Rectangle tRECT
Var hObjOld2 = SelectObject(hGfxDC, hBrush)
fTimer = Timer
Do
For iX = 0 To iH - speed Step speed
For iY = 0 To iW - speed Step speed
dx = (iY - HW) / iW
dy = (iX - HH) / iH
dz = -1
'l = Sqr(dx * dx + dy * dy + dz * dz)
l = _ASM_Sqr(dx * dx + dy * dy + dz * dz)
dx /= l
dy /= l
dz /= l
x = dx
y = dy
'Rotate(A0 - A2)
_ASM_Rotate(A0 - A2)
dy = y
Y = dz
dx = x
x = dy
'Rotate(A3)
_ASM_Rotate(A3)
dy = x
dz = y
R = 100
A = dx * dx + dy * dy
B = 2 * (dx * ox + dy * oy)
C = ox * ox + oy * oy - R * R
'rd = Sqr(B * B - 4 * A * C)
rd = _ASM_Sqr(B * B - 4 * A * C)
q = -0.5 * (B - rd)
t1 = C / q
tu = 5 * Atn(dy / dx) / pi2
tv = dz * t1 / 256 + A1
g = (tv * &hFF Xor tu * &hFF) And 255
'tRECT.iLeft = iY
'tRECT.iTop = iX
'tRECT.iRight = iY + speed
'tRECT.iBottom = iX + speed
'WinAPI calls are shitty slow
'hBrush = CreateSolidBrush((g Shl 16) + (g Shl 8) + g)
'FillRect(hGfxDC, @tRECT, hBrush)
'DeleteObject(hBrush)
'let use own rectangle routine ;-)
_FillRect(iX, iY, iX + speed, iY + speed, (g Shl 16) + (g Shl 8) + g)
Next
A0 += 0.00003
A1 += 0.00013
A2 += 0.00010
A3 += 0.00004
Next
BitBlt(hDC, 0, 0, iW, iH, hGfxDC, 0, 0, SRCCOPY)
iFPS += 1
If Timer - fTimer > 0.99999 Then
WindowTitle sTitle & iFPS
iFPS = 0
fTimer = Timer
EndIf
If (ScreenEvent(@evt)) Then
Select Case evt.Type
Case SC_ESCAPE, EVENT_WINDOW_CLOSE
SelectObject(hGfxDC, hObjOld)
SelectObject(hGfxDC, hObjOld2)
ReleaseDC(hHWND, hDC)
DeleteDC(hGfxDC)
DeleteObject(hBitmapGDI)
Exit Do
End Select
EndIf
Sleep(5)
Loop
Sub _FillRect(iX As ULong, iY As ULong, iWW As ULong, iHH As ULong, iColor As ULong)
Dim As ULong iXX, iYY, iOffset
For iYY = iY To iHH - 1
iOffset = iYY * iW
For iXX = iX To iWW - 1
aBitmap[iXX + iOffset] = iColor
Next
Next
End Sub
/'
Sub Rotate(t As Single)
cc = Cos(t)
ss = Sin(t)
z = x * cc - y * ss
y = x * ss + y * cc
x = z
End Sub
'/
Sub _ASM_Rotate(t As Single)
Var Sinus = @ss
Var Cosinus = @cc
'Var xx = @x
'Var yy = @y
'Var zz = @z
Asm
fld dword Ptr[t]
fsincos
mov ecx, [Cosinus]
mov edx, [Sinus]
fstp dword ptr[ecx]
fstp dword Ptr[edx]
End Asm
z = x * cc - y * ss
y = x * ss + y * cc
x = z
End Sub
Function _ASM_Sqr(n As Single) As Single
Asm
rsqrtss xmm0, [n]
mulss xmm0, [n]
movss [function], xmm0
End Asm
End Function
|
Andy hatte eine komplett ASM Version gebastelt, die ohne Rotation viel schneller läuft. _________________ Gruß,
UEZ |
|
Nach oben |
|
|
Eukalyptus
Anmeldungsdatum: 17.05.2013 Beiträge: 11
|
Verfasst am: 16.08.2016, 17:55 Titel: |
|
|
Hier noch ein Beispiel, wie ich das mit dem richtigen Wertebereich meinte.
Input: statt pi nimmt man 2^31 [0..pi] => [0..2^31]
Output statt 1 bekommt man 161906688 [-1..1] => [161906688..-161906688] (Achtung Vorzeichen vertauscht)
Code: |
Function _Sin6th(fX As Double) As Double
Asm
jmp _Sin6th_Start
_Sin6th_Mul: .double 683565275.57643158
_Sin6th_Div: .double -0.0000000061763971109087229
_Sin6th_Rnd: .double 6755399441055744.0
_Sin6th_Start:
movq xmm0, [fX]
mulsd xmm0, [_Sin6th_Mul]
addsd xmm0, [_Sin6th_Rnd]
movd ebx, xmm0
lea eax, [ebx*2+0x80000000]
sar eax, 2
imul eax
sar ebx, 31
lea eax, [edx*2-0x70000000]
lea ecx, [edx*8+edx-0x24000000]
imul edx
xor ecx, ebx
lea eax, [edx*8+edx+0x44A00000]
imul ecx
cvtsi2sd xmm0, edx
mulsd xmm0, [_Sin6th_Div]
movq [Function], xmm0
End Asm
End Function
Function _Sin6thI(iX As Integer) As Integer
Asm
mov ebx, [iX]
lea eax, [ebx*2+0x80000000]
sar eax, 2
imul eax
sar ebx, 31
lea eax, [edx*2-0x70000000]
lea ecx, [edx*8+edx-0x24000000]
imul edx
xor ecx, ebx
lea eax, [edx*8+edx+0x44A00000]
imul ecx
mov [Function], edx
End Asm
End Function
Public Const cPI = Atn(1) * 4
Dim As Integer iW
Screen 20, 32
ScreenInfo iW
Window (0, -1.3) - (iW, 1.3)
Dim As Double fPhase, fPhaseInc
Dim As UInteger iPhase
fPhaseInc = cPI * 4 / iW
ScreenLock()
For x As Integer = 0 To iW-1
fPhase += fPhaseInc
PSet(x, _Sin6th(fPhase)), &h00FF00
Next
ScreenUnLock()
Sleep
ScreenLock()
Window (0, -161906688 * 1.3) - (iW, 161906688 * 1.3)
fPhaseInc = 2^31 * 4 / iW
For x As Integer = 0 To iW-1
iPhase += fPhaseInc
PSet(x, _Sin6thI(-iPhase)), &h0000FF
Next
ScreenUnLock()
Dim As Double fTimer, fTime1, fTime2
fTimer = Timer
For x As Integer = 0 To 10000000
_Sin6th(fPhase)
Next
fTime1 = (Timer - fTimer) * 1000
fTimer = Timer
For x As Integer = 0 To 10000000
_Sin6thI(iPhase)
Next
fTime2 = (Timer - fTimer) * 1000
Locate 2, 2: Print fTime1
Locate 3, 2: Print fTime2
Sleep |
Wird allerdings nur dann schneller sein, wenn man die Werte nicht eh wieder zurückrechnen muss |
|
Nach oben |
|
|
UEZ
Anmeldungsdatum: 24.06.2016 Beiträge: 130 Wohnort: Opel Stadt
|
Verfasst am: 16.08.2016, 18:18 Titel: |
|
|
Vielen Dank Eukalyptus! _________________ Gruß,
UEZ |
|
Nach oben |
|
|
MarkNEW Gast
|
Verfasst am: 03.10.2021, 22:59 Titel: |
|
|
gute Leute hier |
|
Nach oben |
|
|
MarkNEW Gast
|
Verfasst am: 05.10.2021, 20:10 Titel: |
|
|
was Du aber eigentlich suchst, sind wahrscheinlich Wavelets ;o) |
|
Nach oben |
|
|
|
|
Du kannst keine Beiträge in dieses Forum schreiben. Du kannst auf Beiträge in diesem Forum nicht antworten. Du kannst deine Beiträge in diesem Forum nicht bearbeiten. Du kannst deine Beiträge in diesem Forum nicht löschen. Du kannst an Umfragen in diesem Forum nicht mitmachen.
|
|