BasicOpsX86: regenerated with final 4.4.0 release of GCC

Regenerated BasicOps assemblies with final 4.4.0 release of GCC,
providing some more minor optimizations.
This commit is contained in:
Tobias Doerffel
2009-04-23 12:09:39 +02:00
parent d38d437349
commit 780447d751
4 changed files with 197 additions and 246 deletions

View File

@@ -441,7 +441,7 @@ FOREACH(opt_target ${opt_targets})
ELSE(EXISTS "$ENV{SVN_C_COMPILER}")
SET(C_COMPILER ${CMAKE_C_COMPILER})
ENDIF(EXISTS "$ENV{SVN_C_COMPILER}")
ADD_CUSTOM_TARGET(regen-basic-ops-${opt_target} COMMAND ${C_COMPILER} -O2 -ftree-vectorize -ftree-vectorizer-verbose=2 -fomit-frame-pointer -c -S -I${CMAKE_SOURCE_DIR}/include -I${CMAKE_BINARY_DIR} -g0 -DBUILD_${OPT_TARGET} -m${opt_target} ${FPMATH_FLAGS} -o ${BASIC_OPS_X86_TARGET_S} ${BASIC_OPS_X86_C} DEPENDS ${BASIC_OPS_X86_C})
ADD_CUSTOM_TARGET(regen-basic-ops-${opt_target} COMMAND ${C_COMPILER} -O2 -fno-stack-protector -ftree-vectorize -ftree-vectorizer-verbose=2 -fomit-frame-pointer -c -S -I${CMAKE_SOURCE_DIR}/include -I${CMAKE_BINARY_DIR} -g0 -DBUILD_${OPT_TARGET} -m${opt_target} ${FPMATH_FLAGS} -o ${BASIC_OPS_X86_TARGET_S} ${BASIC_OPS_X86_C} DEPENDS ${BASIC_OPS_X86_C})
ADD_CUSTOM_COMMAND(OUTPUT ${BASIC_OPS_X86_TARGET_O} COMMAND ${CMAKE_C_COMPILER} ARGS ${BASIC_OPS_X86_TARGET_S} -c -o ${BASIC_OPS_X86_TARGET_O} DEPENDS ${BASIC_OPS_X86_TARGET_S})
ADD_DEPENDENCIES(regen-basic-ops regen-basic-ops-${opt_target})
SET(opt_target_objects ${opt_target_objects} ${BASIC_OPS_X86_TARGET_O})

View File

@@ -103,5 +103,5 @@ movq %mm0, 56(%edx)
emms
ret
.size alignedMemClearMMX, .-alignedMemClearMMX
.ident "GCC: (GNU) 4.4.0 20090304 (experimental)"
.ident "GCC: (Ubuntu 4.4.0-0ubuntu2) 4.4.0"
.section .note.GNU-stack,"",@progbits

View File

@@ -230,48 +230,42 @@ alignedBufWetDryMixSplittedSSE:
pushl %edi
pushl %esi
pushl %ebx
subl $140, %esp
movl 180(%esp), %eax
flds 172(%esp)
movl 160(%esp), %edx
movl 164(%esp), %esi
subl $124, %esp
movl 164(%esp), %eax
movl 144(%esp), %edx
movl 148(%esp), %esi
movl 152(%esp), %ecx
testl %eax, %eax
movl 168(%esp), %ecx
flds 176(%esp)
jle .L43
movl 180(%esp), %eax
jle .L39
movl 164(%esp), %eax
subl $1, %eax
shrl %eax
addl $1, %eax
movl %eax, %ebp
movl %eax, 120(%esp)
movl %eax, 104(%esp)
shrl $2, %ebp
cmpl $3, 120(%esp)
cmpl $3, 104(%esp)
leal 0(,%ebp,4), %eax
movl %eax, 124(%esp)
movl %eax, 108(%esp)
jbe .L40
testl %eax, %eax
jne .L34
.L40:
fxch %st(1)
xorl %edi, %edi
jmp .L36
.p2align 4,,7
.p2align 3
.L34:
fsts 12(%esp)
fxch %st(1)
movss 160(%esp), %xmm0
xorps %xmm7, %xmm7
movss 12(%esp), %xmm0
movl %esi, %ebx
fsts 12(%esp)
xorl %eax, %eax
xorl %edi, %edi
shufps $0, %xmm0, %xmm0
movaps %xmm0, 32(%esp)
movss 12(%esp), %xmm0
shufps $0, %xmm0, %xmm0
movaps %xmm0, 16(%esp)
movss 156(%esp), %xmm0
shufps $0, %xmm0, %xmm0
movaps %xmm0, (%esp)
.p2align 4,,7
.p2align 3
.L37:
@@ -282,28 +276,28 @@ alignedBufWetDryMixSplittedSSE:
shufps $136, %xmm6, %xmm0
movaps 32(%edx,%eax,2), %xmm4
shufps $221, %xmm6, %xmm5
movaps %xmm0, 96(%esp)
movaps %xmm0, 80(%esp)
movaps 48(%edx,%eax,2), %xmm3
movaps %xmm4, %xmm0
shufps $136, %xmm3, %xmm0
movaps 96(%esp), %xmm2
movaps 80(%esp), %xmm2
shufps $221, %xmm3, %xmm4
movaps %xmm7, %xmm6
movlps (%ebx), %xmm6
movaps %xmm5, 80(%esp)
movaps %xmm5, 64(%esp)
movhps 8(%ebx), %xmm6
shufps $136, %xmm0, %xmm2
movaps %xmm0, 64(%esp)
movaps %xmm0, 48(%esp)
movaps %xmm7, %xmm5
movaps %xmm6, %xmm0
movlps 16(%ebx), %xmm5
movhps 24(%ebx), %xmm5
shufps $136, %xmm5, %xmm0
mulps 32(%esp), %xmm2
mulps 16(%esp), %xmm2
shufps $221, %xmm5, %xmm6
movaps %xmm4, 48(%esp)
movaps %xmm4, 32(%esp)
addl $32, %ebx
mulps 16(%esp), %xmm0
mulps (%esp), %xmm0
movaps %xmm7, %xmm4
movlps (%eax,%ecx), %xmm4
movaps %xmm7, %xmm3
@@ -313,21 +307,21 @@ alignedBufWetDryMixSplittedSSE:
movhps 24(%ecx,%eax), %xmm3
shufps $136, %xmm3, %xmm1
addps %xmm0, %xmm2
movaps 80(%esp), %xmm0
movaps 64(%esp), %xmm0
shufps $221, %xmm3, %xmm4
shufps $136, 48(%esp), %xmm0
mulps 16(%esp), %xmm1
shufps $136, 32(%esp), %xmm0
mulps (%esp), %xmm1
movaps %xmm2, %xmm3
movaps 80(%esp), %xmm5
mulps 32(%esp), %xmm0
shufps $221, 48(%esp), %xmm5
mulps 16(%esp), %xmm6
movaps 64(%esp), %xmm5
mulps 16(%esp), %xmm0
shufps $221, 32(%esp), %xmm5
mulps (%esp), %xmm6
addps %xmm1, %xmm0
movaps 96(%esp), %xmm1
shufps $221, 64(%esp), %xmm1
mulps 16(%esp), %xmm4
mulps 32(%esp), %xmm1
mulps 32(%esp), %xmm5
movaps 80(%esp), %xmm1
shufps $221, 48(%esp), %xmm1
mulps (%esp), %xmm4
mulps 16(%esp), %xmm1
mulps 16(%esp), %xmm5
addps %xmm6, %xmm1
addps %xmm4, %xmm5
movaps %xmm0, %xmm4
@@ -348,63 +342,53 @@ alignedBufWetDryMixSplittedSSE:
addl $32, %eax
cmpl %edi, %ebp
ja .L37
movl 124(%esp), %edi
movl 120(%esp), %eax
movl 108(%esp), %edi
movl 104(%esp), %eax
addl %edi, %edi
cmpl %eax, 124(%esp)
je .L44
cmpl %eax, 108(%esp)
je .L39
.L36:
leal (%edx,%edi,8), %ebx
movss 156(%esp), %xmm0
xorl %ebp, %ebp
leal 8(%edx,%edi,8), %edx
movss 160(%esp), %xmm1
movl %edi, %eax
leal (%edx,%edi,8), %ebx
leal 8(%edx,%edi,8), %edx
.p2align 4,,7
.p2align 3
.L38:
flds (%ebx)
movss (%esi,%eax,4), %xmm3
addl $2, %ebp
fmul %st(2), %st
flds (%esi,%eax,4)
fmul %st(2), %st
faddp %st, %st(1)
fstps (%ebx)
flds 4(%ebx)
fmul %st(2), %st
flds (%ecx,%eax,4)
fmul %st(2), %st
faddp %st, %st(1)
fstps 4(%ebx)
movss (%ebx), %xmm2
mulss %xmm0, %xmm3
mulss %xmm1, %xmm2
addss %xmm3, %xmm2
movss %xmm2, (%ebx)
movss 4(%ebx), %xmm2
movss (%ecx,%eax,4), %xmm3
mulss %xmm1, %xmm2
mulss %xmm0, %xmm3
addss %xmm3, %xmm2
movss %xmm2, 4(%ebx)
addl $16, %ebx
flds (%edx)
fmul %st(2), %st
flds 4(%esi,%eax,4)
fmul %st(2), %st
faddp %st, %st(1)
fstps (%edx)
flds 4(%edx)
fmul %st(2), %st
flds 4(%ecx,%eax,4)
movss 4(%esi,%eax,4), %xmm3
movss (%edx), %xmm2
mulss %xmm0, %xmm3
mulss %xmm1, %xmm2
addss %xmm3, %xmm2
movss %xmm2, (%edx)
movss 4(%edx), %xmm2
movss 4(%ecx,%eax,4), %xmm3
mulss %xmm1, %xmm2
leal (%edi,%ebp), %eax
fmul %st(2), %st
faddp %st, %st(1)
fstps 4(%edx)
mulss %xmm0, %xmm3
addss %xmm3, %xmm2
movss %xmm2, 4(%edx)
addl $16, %edx
cmpl %eax, 180(%esp)
cmpl %eax, 164(%esp)
jg .L38
fstp %st(0)
fstp %st(0)
jmp .L39
.L43:
fstp %st(0)
fstp %st(0)
jmp .L39
.L44:
fstp %st(0)
fstp %st(0)
.p2align 4,,7
.p2align 3
.L39:
addl $140, %esp
addl $124, %esp
popl %ebx
popl %esi
popl %edi
@@ -417,39 +401,34 @@ alignedBufWetDryMixSplittedSSE:
unalignedBufMixLRCoeffSSE:
pushl %esi
pushl %ebx
subl $4, %esp
movl 32(%esp), %esi
flds 24(%esp)
movl 16(%esp), %eax
movl 20(%esp), %edx
movl %esi, %ebx
flds 28(%esp)
shrl $31, %ebx
leal (%esi,%ebx), %ecx
movl 28(%esp), %ebx
movl 12(%esp), %eax
movl 16(%esp), %edx
movss 20(%esp), %xmm1
movl %ebx, %esi
shrl $31, %esi
leal (%ebx,%esi), %ecx
andl $1, %ecx
cmpl %ebx, %ecx
jne .L54
.L46:
testl %esi, %esi
jle .L55
leal -1(%esi), %ebx
shrl %ebx
cmpl %esi, %ecx
movss 24(%esp), %xmm3
jne .L52
.L44:
testl %ebx, %ebx
jle .L49
testb $15, %al
jne .L48
fxch %st(1)
fstps (%esp)
jne .L46
movaps %xmm1, %xmm0
subl $1, %ebx
unpcklps %xmm3, %xmm0
shrl %ebx
xorps %xmm2, %xmm2
movss (%esp), %xmm0
addl $1, %ebx
fstps (%esp)
xorl %ecx, %ecx
movss (%esp), %xmm1
unpcklps %xmm1, %xmm0
movaps %xmm0, %xmm3
addl $1, %ebx
movlhps %xmm0, %xmm3
xorl %ecx, %ecx
.p2align 4,,7
.p2align 3
.L49:
.L47:
movaps %xmm2, %xmm1
addl $1, %ecx
movlps (%edx), %xmm1
@@ -463,65 +442,53 @@ unalignedBufMixLRCoeffSSE:
movaps %xmm0, (%eax)
addl $16, %eax
cmpl %ebx, %ecx
jb .L49
jmp .L51
.p2align 4,,7
.p2align 3
.L55:
fstp %st(0)
fstp %st(0)
.p2align 4,,7
.p2align 3
.L51:
addl $4, %esp
jb .L47
.L49:
popl %ebx
popl %esi
ret
.p2align 4,,7
.p2align 3
.L48:
.L46:
xorl %ecx, %ecx
.p2align 4,,7
.p2align 3
.L50:
flds (%edx,%ecx,8)
fmul %st(2), %st
fadds (%eax,%ecx,8)
fstps (%eax,%ecx,8)
flds 4(%edx,%ecx,8)
fmul %st(1), %st
fadds 4(%eax,%ecx,8)
fstps 4(%eax,%ecx,8)
flds 8(%edx,%ecx,8)
fmul %st(2), %st
fadds 8(%eax,%ecx,8)
fstps 8(%eax,%ecx,8)
flds 12(%edx,%ecx,8)
fmul %st(1), %st
fadds 12(%eax,%ecx,8)
fstps 12(%eax,%ecx,8)
.L48:
movss (%edx,%ecx,8), %xmm0
mulss %xmm1, %xmm0
addss (%eax,%ecx,8), %xmm0
movss %xmm0, (%eax,%ecx,8)
movss 4(%edx,%ecx,8), %xmm0
mulss %xmm3, %xmm0
addss 4(%eax,%ecx,8), %xmm0
movss %xmm0, 4(%eax,%ecx,8)
movss 8(%edx,%ecx,8), %xmm0
mulss %xmm1, %xmm0
addss 8(%eax,%ecx,8), %xmm0
movss %xmm0, 8(%eax,%ecx,8)
movss 12(%edx,%ecx,8), %xmm0
mulss %xmm3, %xmm0
addss 12(%eax,%ecx,8), %xmm0
movss %xmm0, 12(%eax,%ecx,8)
addl $2, %ecx
cmpl %ecx, %esi
jg .L50
fstp %st(0)
fstp %st(0)
addl $4, %esp
cmpl %ecx, %ebx
jg .L48
popl %ebx
popl %esi
ret
.L54:
flds (%edx)
subl $1, %esi
fmul %st(2), %st
fadds (%eax)
fstps (%eax)
flds 4(%edx)
.L52:
movss (%edx), %xmm0
subl $1, %ebx
mulss %xmm1, %xmm0
addss (%eax), %xmm0
movss %xmm0, (%eax)
movss 4(%edx), %xmm0
addl $8, %edx
fmul %st(1), %st
fadds 4(%eax)
fstps 4(%eax)
mulss %xmm3, %xmm0
addss 4(%eax), %xmm0
movss %xmm0, 4(%eax)
addl $8, %eax
jmp .L46
jmp .L44
.size unalignedBufMixLRCoeffSSE, .-unalignedBufMixLRCoeffSSE
.ident "GCC: (GNU) 4.4.0 20090304 (experimental)"
.ident "GCC: (Ubuntu 4.4.0-0ubuntu2) 4.4.0"
.section .note.GNU-stack,"",@progbits

View File

@@ -70,19 +70,19 @@ alignedConvertToS16SSE2:
pushl %ebx
subl $8, %esp
movl 36(%esp), %eax
movss .LC0, %xmm6
cmpb $0, 44(%esp)
flds .LC0
movl 28(%esp), %edx
movl 32(%esp), %ebx
movl %eax, %esi
fmuls 40(%esp)
mulss 40(%esp), %xmm6
jne .L13
testw %ax, %ax
jle .L35
jle .L15
movl %eax, %edi
shrw $2, %di
cmpw $3, %ax
movw %ax, 4(%esp)
movw %ax, 2(%esp)
leal 0(,%edi,4), %ebp
ja .L33
.L28:
@@ -98,19 +98,17 @@ alignedConvertToS16SSE2:
.p2align 4,,7
.p2align 3
.L25:
flds (%edx)
fmul %st(1), %st
fstps 4(%esp)
cvttss2si 4(%esp), %ecx
flds 4(%edx)
fmul %st(1), %st
movss (%edx), %xmm0
mulss %xmm6, %xmm0
cvttss2si %xmm0, %ecx
movss 4(%edx), %xmm0
cmpl $-32768, %ecx
mulss %xmm6, %xmm0
cmovl %edi, %ecx
cmpl $32767, %ecx
cmovg %ebx, %ecx
fstps 4(%esp)
movw %cx, (%eax)
cvttss2si 4(%esp), %ecx
cvttss2si %xmm0, %ecx
cmpl $-32768, %ecx
cmovl %edi, %ecx
cmpl $32767, %ecx
@@ -121,15 +119,6 @@ alignedConvertToS16SSE2:
addl $4, %eax
cmpw %bp, %si
jg .L25
fstp %st(0)
jmp .L15
.L35:
fstp %st(0)
jmp .L15
.L36:
fstp %st(0)
.p2align 4,,7
.p2align 3
.L15:
movswl %si,%esi
addl $8, %esp
@@ -143,11 +132,11 @@ alignedConvertToS16SSE2:
.p2align 3
.L13:
testw %ax, %ax
jle .L36
jle .L15
movl %eax, %ebp
shrw $2, %bp
cmpw $3, %si
movw %ax, 4(%esp)
movw %ax, 2(%esp)
leal 0(,%ebp,4), %eax
ja .L34
.L27:
@@ -162,24 +151,22 @@ alignedConvertToS16SSE2:
.p2align 4,,7
.p2align 3
.L20:
flds (%ecx)
movss (%ecx), %xmm0
movl $32767, %ebp
fmul %st(1), %st
fstps 4(%esp)
cvttss2si 4(%esp), %ebx
flds 4(%ecx)
fmul %st(1), %st
mulss %xmm6, %xmm0
cvttss2si %xmm0, %ebx
movss 4(%ecx), %xmm0
cmpl $-32768, %ebx
cmovl %edi, %ebx
cmpl $32767, %ebx
mulss %xmm6, %xmm0
cmovg %ebp, %ebx
movzbl %bh, %ebp
sall $8, %ebx
orl %ebp, %ebx
movl $32767, %ebp
fstps 4(%esp)
movw %bx, (%edx)
cvttss2si 4(%esp), %ebx
cvttss2si %xmm0, %ebx
cmpl $-32768, %ebx
cmovl %edi, %ebx
cmpl $32767, %ebx
@@ -193,107 +180,105 @@ alignedConvertToS16SSE2:
addl $4, %edx
cmpw %ax, %si
jg .L20
fstp %st(0)
jmp .L15
.p2align 4,,7
.p2align 3
.L34:
testw %ax, %ax
je .L27
fsts (%esp)
movaps %xmm6, %xmm0
xorl %ecx, %ecx
movdqa .LC1, %xmm3
movss (%esp), %xmm0
xorl %edi, %edi
movdqa .LC2, %xmm2
shufps $0, %xmm0, %xmm0
movdqa .LC2, %xmm2
movss %xmm6, 4(%esp)
xorl %edi, %edi
movaps %xmm0, %xmm7
.p2align 4,,7
.p2align 3
.L19:
movaps (%edx,%ecx,2), %xmm0
movdqa %xmm2, %xmm4
movdqa %xmm2, %xmm5
movdqa %xmm2, %xmm6
addl $1, %edi
movaps 16(%edx,%ecx,2), %xmm5
movaps 16(%edx,%ecx,2), %xmm4
mulps %xmm7, %xmm0
mulps %xmm7, %xmm5
mulps %xmm7, %xmm4
cvttps2dq %xmm0, %xmm0
movdqa %xmm0, %xmm1
pcmpgtd %xmm3, %xmm1
pand %xmm1, %xmm0
pandn %xmm3, %xmm1
por %xmm0, %xmm1
cvttps2dq %xmm5, %xmm5
cvttps2dq %xmm4, %xmm4
movdqa %xmm1, %xmm0
pcmpgtd %xmm2, %xmm0
pand %xmm0, %xmm4
pand %xmm0, %xmm5
pandn %xmm1, %xmm0
movdqa %xmm0, %xmm1
movdqa %xmm5, %xmm0
por %xmm4, %xmm1
movdqa %xmm4, %xmm0
por %xmm5, %xmm1
pcmpgtd %xmm3, %xmm0
movdqa .LC3, %xmm4
pand %xmm0, %xmm5
pand %xmm1, %xmm4
pandn %xmm3, %xmm0
psrad $8, %xmm4
por %xmm5, %xmm0
pslld $8, %xmm1
movdqa %xmm0, %xmm5
pcmpgtd %xmm2, %xmm5
pand %xmm5, %xmm6
pandn %xmm0, %xmm5
movdqa %xmm5, %xmm0
movdqa .LC3, %xmm5
por %xmm6, %xmm0
pand %xmm0, %xmm5
pslld $8, %xmm0
pand %xmm0, %xmm4
pand %xmm1, %xmm5
pandn %xmm3, %xmm0
psrad $8, %xmm5
movdqa %xmm4, %xmm6
punpcklwd %xmm5, %xmm4
punpckhwd %xmm5, %xmm6
movdqa %xmm4, %xmm5
punpcklwd %xmm6, %xmm4
punpckhwd %xmm6, %xmm5
punpcklwd %xmm5, %xmm4
movdqa %xmm1, %xmm5
por %xmm4, %xmm0
pslld $8, %xmm1
movdqa %xmm0, %xmm4
pcmpgtd %xmm2, %xmm4
pand %xmm4, %xmm6
pandn %xmm0, %xmm4
movdqa %xmm4, %xmm0
movdqa .LC3, %xmm4
por %xmm6, %xmm0
pand %xmm0, %xmm4
pslld $8, %xmm0
psrad $8, %xmm4
movdqa %xmm5, %xmm6
punpcklwd %xmm4, %xmm5
punpckhwd %xmm4, %xmm6
movdqa %xmm5, %xmm4
punpcklwd %xmm6, %xmm5
punpckhwd %xmm6, %xmm4
punpcklwd %xmm4, %xmm5
movdqa %xmm1, %xmm4
punpcklwd %xmm0, %xmm1
punpckhwd %xmm0, %xmm5
movdqa %xmm1, %xmm0
punpcklwd %xmm5, %xmm1
punpckhwd %xmm5, %xmm0
punpcklwd %xmm0, %xmm1
por %xmm1, %xmm4
movdqa %xmm4, (%ebx,%ecx)
punpckhwd %xmm0, %xmm4
movdqa %xmm1, %xmm6
punpcklwd %xmm4, %xmm1
punpckhwd %xmm4, %xmm6
punpcklwd %xmm6, %xmm1
por %xmm1, %xmm5
movdqa %xmm5, (%ebx,%ecx)
addl $16, %ecx
cmpw %di, %bp
ja .L19
cmpw 4(%esp), %ax
cmpw 2(%esp), %ax
movss 4(%esp), %xmm6
jne .L18
fstp %st(0)
jmp .L15
.p2align 4,,7
.p2align 3
.L33:
testw %bp, %bp
.p2align 4,,4
.p2align 4,,3
.p2align 3
je .L28
fsts (%esp)
movaps %xmm6, %xmm0
xorl %eax, %eax
movdqa .LC1, %xmm3
movss (%esp), %xmm0
xorl %ecx, %ecx
movdqa .LC2, %xmm2
shufps $0, %xmm0, %xmm0
movdqa .LC2, %xmm2
xorl %ecx, %ecx
movaps %xmm0, %xmm5
.p2align 4,,7
.p2align 3
.L24:
movaps (%edx,%eax,2), %xmm0
addl $1, %ecx
movdqa %xmm2, %xmm6
movdqa %xmm2, %xmm7
movaps 16(%edx,%eax,2), %xmm4
mulps %xmm5, %xmm0
mulps %xmm5, %xmm4
@@ -306,23 +291,23 @@ alignedConvertToS16SSE2:
cvttps2dq %xmm4, %xmm4
movdqa %xmm1, %xmm0
pcmpgtd %xmm2, %xmm0
pand %xmm0, %xmm6
pand %xmm0, %xmm7
pandn %xmm1, %xmm0
movdqa %xmm0, %xmm1
movdqa %xmm4, %xmm0
por %xmm6, %xmm1
por %xmm7, %xmm1
pcmpgtd %xmm3, %xmm0
movdqa %xmm2, %xmm6
movdqa %xmm2, %xmm7
pand %xmm0, %xmm4
pandn %xmm3, %xmm0
por %xmm4, %xmm0
movdqa %xmm0, %xmm4
pcmpgtd %xmm2, %xmm4
pand %xmm4, %xmm6
pand %xmm4, %xmm7
pandn %xmm0, %xmm4
movdqa %xmm4, %xmm0
movdqa %xmm1, %xmm4
por %xmm6, %xmm0
por %xmm7, %xmm0
punpckhwd %xmm0, %xmm4
punpcklwd %xmm0, %xmm1
movdqa %xmm1, %xmm0
@@ -333,9 +318,8 @@ alignedConvertToS16SSE2:
addl $16, %eax
cmpw %cx, %di
ja .L24
cmpw %bp, 4(%esp)
cmpw %bp, 2(%esp)
jne .L23
fstp %st(0)
jmp .L15
.size alignedConvertToS16SSE2, .-alignedConvertToS16SSE2
.section .rodata.cst4,"aM",@progbits,4
@@ -361,5 +345,5 @@ alignedConvertToS16SSE2:
.long 65280
.long 65280
.long 65280
.ident "GCC: (GNU) 4.4.0 20090304 (experimental)"
.ident "GCC: (Ubuntu 4.4.0-0ubuntu2) 4.4.0"
.section .note.GNU-stack,"",@progbits