toge's diary

コンピュータ関連の趣味をつらつらと。

gccはそんなに悪い子じゃないよっ

gcc vs MSVC

こういう記事があると脊髄反射で検証してしまうダメな私です。

さてVisualC++は全然興味が無いし、おそらく最適化オプション調整すればもっと速いコードが出てくるはずなので「Visual C++より速いとか遅いとか」で一喜一憂する気はありません。

なによりgccがそれほどダメでないことだけを実証して置きます。

まずはC言語のソース。

#include <stdio.h>
#include <stdlib.h>

int fib(int n) {
  if (n < 2)
    return n;
  else
    return fib(n-1) + fib(n-2);
}

int main(int argc, char* argv[]) {
  int n = argc > 1 ? atoi(argv[1]) : 40;
  printf("%d\n", fib(n));
  return 0;
}

gcc 3.3移行のめぼしいバージョンと、おまけでリリース前のgcc-4.4.0もつけて、Core2Duo 6300上での実行結果。
どのバージョンも5回やって一番いい結果を載せてます。

% gcc-3.3.6 -O2 fibb.c; time ./a.out
102334155
./a.out  2.54s user 0.00s system 99% cpu 2.553 total

% gcc-3.3.6 -O3 fibb.c; time ./a.out
102334155
./a.out  2.56s user 0.00s system 99% cpu 2.575 total

% gcc-3.4.6 -O2 fibb.c; time ./a.out
102334155
./a.out  2.54s user 0.01s system 98% cpu 2.586 total

% gcc-3.4.6 -O2 fibb.c; time ./a.out
102334155
./a.out  2.55s user 0.00s system 99% cpu 2.571 total

% gcc-4.0.3 -O2 fibb.c; time ./a.out
102334155
./a.out  2.30s user 0.00s system 99% cpu 2.306 total

% gcc-4.0.3 -O3 fibb.c; time ./a.out
102334155
./a.out  1.03s user 0.00s system 100% cpu 1.031 total

% gcc-4.1.2 -O2 fibb.c; time ./a.out
102334155
./a.out  2.34s user 0.00s system 99% cpu 2.347 total

% gcc-4.1.2 -O3 fibb.c; time ./a.out
102334155
./a.out  0.94s user 0.00s system 99% cpu 0.953 total

% gcc-4.2.4 -O2 fibb.c; time ./a.out
102334155
./a.out  2.45s user 0.00s system 99% cpu 2.468 total

% gcc-4.2.4 -O3 fibb.c; time ./a.out
102334155
./a.out  0.90s user 0.00s system 98% cpu 0.908 total

% gcc-4.3.2 -O2 fibb.c; time./a.out
102334155
./a.out  2.50s user 0.01s system 99% cpu 2.530 total

% gcc-4.3.3 -O3 fibb.c; time ./a.out
102334155
./a.out  0.82s user 0.00s system 99% cpu 0.830 total

% gcc-4.4.0 -O2 fibb.c; time ./a.out
102334155
./a.out  2.41s user 0.00s system 99% cpu 2.429 total

% gcc-4.4.0 -O3 fibb.c; time ./a.out
102334155
./a.out  0.84s user 0.00s system 97% cpu 0.868 total

こうやってみると3.4→4.0, 4.2→4.3に大きな変化を感じるね。2.95の頃から使っている身で考えるとなかなか感慨深い。

一方で-O2の結果は案外改善しないもんだなぁ。

まあ、何にせよ3.4系なんて間違っても使うなよってことか。

多分mokeheheさんのgccのコードはgcc-4.3.2 -O2でコンパイルしたんだろうなぁ。ちなみにうちでこのバージョンでやるとこんな感じ。

fib:
        push    ebp
        mov     ebp, esp
        push    edi
        push    esi
        xor     esi, esi
        push    ebx
        sub     esp, 12
        mov     edi, DWORD PTR [ebp+8]
        cmp     edi, 1
        mov     ebx, edi
        jle     .L3
        .p2align 4,,7
        .p2align 3
.L6:
        lea     eax, [ebx-1]
        sub     ebx, 2
        mov     DWORD PTR [esp], eax
        call    fib
        add     esi, eax
        cmp     ebx, 1
        jg      .L6
        and     edi, 1
.L3:
        add     esp, 12
        lea     eax, [esi+edi]
        pop     ebx
        pop     esi
        pop     edi
        pop     ebp
        ret

確かに前準備とか後始末もいまいちだし、ちょっとね・・・って感じ。

で、gcc-4.3.3 -O3にすると・・・。

fib:
	push	ebp
	mov	ebp, esp
	push	edi
	push	esi
	push	ebx
	sub	esp, 156
	cmp	DWORD PTR [ebp+8], 1
	mov	DWORD PTR [ebp-116], 0
	jle	.L3
	mov	eax, DWORD PTR [ebp+8]
	mov	ecx, DWORD PTR [ebp+8]
	mov	edx, DWORD PTR [ebp+8]
	mov	DWORD PTR [ebp-116], 0
	sub	eax, 1
	sub	ecx, 3
	mov	DWORD PTR [ebp-32], eax
	mov	DWORD PTR [ebp-28], ecx
.L28:
	mov	eax, DWORD PTR [ebp-32]
	lea	ecx, [edx-2]
	mov	DWORD PTR [ebp-112], 0
	mov	DWORD PTR [ebp-132], ecx
	cmp	eax, 1
	jle	.L5
	mov	ecx, DWORD PTR [ebp-28]
	lea	eax, [edx-2]
	sub	edx, 4
	mov	DWORD PTR [ebp-132], eax
	mov	DWORD PTR [ebp-40], eax
	mov	DWORD PTR [ebp-16], edx
	and	ecx, -2
	mov	DWORD PTR [ebp-36], edx
	mov	DWORD PTR [ebp-112], 0
	mov	DWORD PTR [ebp-152], ecx
.L27:
	mov	edx, DWORD PTR [ebp-40]
	mov	DWORD PTR [ebp-108], 0
	cmp	edx, 1
	jle	.L7
	mov	eax, DWORD PTR [ebp-40]
	mov	ecx, DWORD PTR [ebp-40]
	mov	DWORD PTR [ebp-108], 0
	sub	eax, 1
	sub	ecx, 3
	mov	DWORD PTR [ebp-48], eax
	mov	DWORD PTR [ebp-44], ecx
.L26:
	mov	eax, DWORD PTR [ebp-48]
	lea	ecx, [edx-2]
	mov	DWORD PTR [ebp-104], 0
	mov	DWORD PTR [ebp-128], ecx
	cmp	eax, 1
	jle	.L9
	mov	ecx, DWORD PTR [ebp-44]
	lea	eax, [edx-2]
	sub	edx, 4
	mov	DWORD PTR [ebp-128], eax
	mov	DWORD PTR [ebp-56], eax
	mov	DWORD PTR [ebp-20], edx
	and	ecx, -2
	mov	DWORD PTR [ebp-52], edx
	mov	DWORD PTR [ebp-104], 0
	mov	DWORD PTR [ebp-148], ecx
.L25:
	mov	edx, DWORD PTR [ebp-56]
	mov	DWORD PTR [ebp-100], 0
	cmp	edx, 1
	jle	.L11
	mov	eax, DWORD PTR [ebp-56]
	mov	ecx, DWORD PTR [ebp-56]
	mov	DWORD PTR [ebp-100], 0
	sub	eax, 1
	sub	ecx, 3
	mov	DWORD PTR [ebp-64], eax
	mov	DWORD PTR [ebp-60], ecx
.L24:
	mov	eax, DWORD PTR [ebp-64]
	lea	ecx, [edx-2]
	mov	DWORD PTR [ebp-96], 0
	mov	DWORD PTR [ebp-124], ecx
	cmp	eax, 1
	jle	.L13
	mov	ecx, DWORD PTR [ebp-60]
	lea	eax, [edx-2]
	sub	edx, 4
	mov	DWORD PTR [ebp-124], eax
	mov	DWORD PTR [ebp-72], eax
	mov	DWORD PTR [ebp-24], edx
	and	ecx, -2
	mov	DWORD PTR [ebp-68], edx
	mov	DWORD PTR [ebp-96], 0
	mov	DWORD PTR [ebp-144], ecx
.L23:
	mov	edx, DWORD PTR [ebp-72]
	mov	DWORD PTR [ebp-92], 0
	cmp	edx, 1
	jle	.L15
	mov	eax, DWORD PTR [ebp-72]
	mov	ecx, DWORD PTR [ebp-72]
	mov	DWORD PTR [ebp-92], 0
	sub	eax, 1
	sub	ecx, 3
	mov	DWORD PTR [ebp-80], eax
	mov	DWORD PTR [ebp-76], ecx
.L22:
	mov	eax, DWORD PTR [ebp-80]
	lea	ecx, [edx-2]
	mov	DWORD PTR [ebp-88], 0
	mov	DWORD PTR [ebp-120], ecx
	cmp	eax, 1
	jle	.L17
	mov	ecx, DWORD PTR [ebp-76]
	lea	eax, [edx-2]
	sub	edx, 4
	mov	DWORD PTR [ebp-120], eax
	mov	edi, eax
	mov	eax, edx
	mov	DWORD PTR [ebp-84], edx
	mov	DWORD PTR [ebp-88], 0
	and	ecx, -2
	sub	eax, ecx
	mov	DWORD PTR [ebp-136], ecx
	mov	DWORD PTR [ebp-140], eax
	.p2align 4,,7
	.p2align 3
.L21:
	xor	esi, esi
	cmp	edi, 1
	mov	ebx, edi
	jle	.L19
	.p2align 4,,7
	.p2align 3
.L30:
	lea	eax, [ebx-1]
	sub	ebx, 2
	mov	DWORD PTR [esp], eax
	call	fib
	add	esi, eax
	cmp	ebx, 1
	jg	.L30
	mov	ebx, DWORD PTR [ebp-84]
	and	ebx, 1
.L19:
	lea	eax, [esi+ebx]
	sub	edi, 2
	add	DWORD PTR [ebp-88], eax
	sub	DWORD PTR [ebp-84], 2
	cmp	edi, DWORD PTR [ebp-140]
	jne	.L21
	mov	eax, DWORD PTR [ebp-76]
	sub	eax, DWORD PTR [ebp-136]
.L17:
	mov	edx, DWORD PTR [ebp-120]
	add	eax, DWORD PTR [ebp-88]
	sub	DWORD PTR [ebp-80], 2
	add	DWORD PTR [ebp-92], eax
	sub	DWORD PTR [ebp-76], 2
	cmp	edx, 1
	jg	.L22
	mov	edx, DWORD PTR [ebp-68]
	and	edx, 1
.L15:
	mov	eax, DWORD PTR [ebp-92]
	sub	DWORD PTR [ebp-72], 2
	sub	DWORD PTR [ebp-68], 2
	add	eax, edx
	add	DWORD PTR [ebp-96], eax
	mov	eax, DWORD PTR [ebp-24]
	sub	eax, DWORD PTR [ebp-144]
	cmp	DWORD PTR [ebp-72], eax
	jne	.L23
	mov	eax, DWORD PTR [ebp-60]
	sub	eax, DWORD PTR [ebp-144]
.L13:
	mov	edx, DWORD PTR [ebp-124]
	add	eax, DWORD PTR [ebp-96]
	sub	DWORD PTR [ebp-64], 2
	add	DWORD PTR [ebp-100], eax
	sub	DWORD PTR [ebp-60], 2
	cmp	edx, 1
	jg	.L24
	mov	edx, DWORD PTR [ebp-52]
	and	edx, 1
.L11:
	mov	eax, DWORD PTR [ebp-100]
	sub	DWORD PTR [ebp-56], 2
	sub	DWORD PTR [ebp-52], 2
	add	eax, edx
	add	DWORD PTR [ebp-104], eax
	mov	eax, DWORD PTR [ebp-20]
	sub	eax, DWORD PTR [ebp-148]
	cmp	DWORD PTR [ebp-56], eax
	jne	.L25
	mov	eax, DWORD PTR [ebp-44]
	sub	eax, DWORD PTR [ebp-148]
.L9:
	mov	edx, DWORD PTR [ebp-128]
	add	eax, DWORD PTR [ebp-104]
	sub	DWORD PTR [ebp-48], 2
	add	DWORD PTR [ebp-108], eax
	sub	DWORD PTR [ebp-44], 2
	cmp	edx, 1
	jg	.L26
	mov	edx, DWORD PTR [ebp-36]
	and	edx, 1
.L7:
	mov	ecx, DWORD PTR [ebp-108]
	sub	DWORD PTR [ebp-40], 2
	sub	DWORD PTR [ebp-36], 2
	lea	eax, [edx+ecx]
	add	DWORD PTR [ebp-112], eax
	mov	eax, DWORD PTR [ebp-16]
	sub	eax, DWORD PTR [ebp-152]
	cmp	DWORD PTR [ebp-40], eax
	jne	.L27
	mov	eax, DWORD PTR [ebp-28]
	sub	eax, DWORD PTR [ebp-152]
.L5:
	mov	edx, DWORD PTR [ebp-132]
	add	eax, DWORD PTR [ebp-112]
	sub	DWORD PTR [ebp-32], 2
	add	DWORD PTR [ebp-116], eax
	sub	DWORD PTR [ebp-28], 2
	cmp	edx, 1
	jg	.L28
	and	DWORD PTR [ebp+8], 1
.L3:
	mov	eax, DWORD PTR [ebp-116]
	add	eax, DWORD PTR [ebp+8]
	add	esp, 156
	pop	ebx
	pop	esi
	pop	edi
	pop	ebp
	ret

はい、もう何がなんだか分からないっ。
でもきっちり速くなります。

まあ、Visual C++とかIntel Compilerと比べると色々ダメな部分もあるけど、gccだって普通には困らない程度に力業でいい結果を吐くよっていう結論かな?