コンパイラ作成(70) ポインタ型とint型の演算

今回の目標

引き続きポインタ型のサポートを頑張るよ。

// int*型+int型
extern void *malloc(size_t size);
extern void free(void *ptr);
extern int puts(char *s);

int main()
{
    int *p = malloc(10*4);
    for(int i = 0; i < 10; i = i + 1)
        printf("p + %d = %016lx\n", i, p+i);
    puts("");
    for(int i = 0; i < 10; i = i + 1)
        printf("%d + p = %016lx\n", i, i+p);
    free(p);
}

前に作ったchar*型とint型の演算を一般化するよ。

sizeof

voidとcharを追加。

  # 型のサイズ
  def sizeof(type)
    if is_pointer_type?(type) then return 8 end
    case type
      when "void"
        return 1
      when "char"
        return 1
      when "int"
        return 4
      when "size_t"
        return 8
      else
        perror "unknown type '#{type}'"
      end
  end

これ追加したんでchar型やvoid型の変数が宣言できるようになっちゃうよ。void型の変数は拙いよね。それとvoid*型への加減算ってC言語では許されてなかった気がするなあ。これ確かgccの独自拡張かな。clang先輩もサポートしてるんでmycにも深く考えずに入れちゃったよ。うーむ。

コード生成部

今回の修正のメイン。

  # 式のコード生成(二項演算の右側被演算子)
  def codegen_els(op, operand, type_l)
    if op.str == "+" then
      ostr = "add "
    elsif op.str == "-" then
      ostr = "sub "
    elsif op.str == "*" then
      ostr = "imul"
    elsif op.str == "/" then
      ostr = "idiv"
    elsif op.str == "%" then
      ostr = "idiv"
    elsif op.str == "==" then
      ostr = "cmp "
    elsif op.str == "!=" then
      ostr = "cmp "
    elsif op.str == "<" || op.str == "<" || op.str == ">" || op.str == "<=" || op.str == ">=" then
      ostr = "cmp "
    else
      perror "unknown operator \"" + op.str + "\""
    end

    # 右被演算子を評価
    type_r = "int"
    if operand.kind_of?(Array) then
      if operand[0].size == 2 && operand[0].kind == TK::ID && operand[1].str == "()" then
        codegen "  sub  rsp, 8"
        codegen "  push rax"
        type_r = codegen_func operand
        codegen "  mov  r10d, eax"
        codegen "  pop  rax"
        codegen "  add  rsp, 8"
      else
        codegen "  sub  rsp, 8"
        codegen "  push rax"
        type_r = codegen_el operand
        codegen "  mov  r10d, eax"
        codegen "  pop  rax"
        codegen "  add  rsp, 8"
      end
      str = "r10d"
    elsif operand.kind == TK::ID then
      v = get_var operand.str
      if v == nil then
        perror "undeclared variable \"" + operand.str + "\""
      end
      type_r = v[0]
      if is_pointer_type? type_r then
        str = "qword ptr [rbp - " + v[1].to_s + "]"
      else
        str = "dword ptr [rbp - " + v[1].to_s + "]"
      end
    elsif operand.kind == TK::NUMBER then
      str = operand.str
    elsif operand.kind == TK::STRING then
      type_r = "char*"
      label = addliteral operand.str
      codegen "  lea  r10, "+label
      str = "r10"
    else
      perror
    end

    # 型チェック
    if type_l != type_r then
      if is_pointer_type?(type_l) && type_r == "int" then
        if op.str != "+" && op.str != "-" then
          perror "mismatched types to binary operation"
        end
      elsif type_l == "int" && is_pointer_type?(type_r) then
        if op.str != "+" && op.str != "-" then
          perror "mismatched types to binary operation"
        end
      else
        perror "mismatched types to binary operation"
      end
    elsif type_l == "char*" then
      perror "mismatched types to binary operation"
    end

    # 左被演算子と右被演算子とで計算
    if op.str == "==" then
      codegen "  " + ostr + " eax, " + str
      codegen "  sete al"
      codegen "  and  eax, 1"
    elsif op.str == "!=" then
      codegen "  " + ostr + " eax, " + str
      codegen "  setne al"
      codegen "  and  eax, 1"
    elsif op.str == "<" then
      codegen "  " + ostr + " eax, " + str
      codegen "  setl al"
      codegen "  and  eax, 1"
    elsif op.str == ">" then
      codegen "  " + ostr + " eax, " + str
      codegen "  setg al"
      codegen "  and  eax, 1"
    elsif op.str == "<=" then
      codegen "  " + ostr + " eax, " + str
      codegen "  setle al"
      codegen "  and  eax, 1"
    elsif op.str == ">=" then
      codegen "  " + ostr + " eax, " + str
      codegen "  setge al"
      codegen "  and  eax, 1"
    elsif op.str == "*" || op.str == "/" || op.str == "%" then
      if str != "r10d" then codegen "  mov  r10d, " + str end
      codegen "  mov  r11, rdx"
      if op.str == "/" || op.str == "%" then
        codegen "  cdq"
      end
      codegen "  " + ostr + " r10d"
      if op.str == "%" then
        codegen "  mov  eax, edx"
      end
      codegen "  mov  rdx, r11"
    else
      if is_pointer_type?(type_l) && type_r == "int" then
        size = sizeof type_l[0,type_l.length-1]
        if str == op.str then
          codegen "  " + ostr + " rax, " + (str.to_i*size).to_s
        elsif str == "r10d" then
          codegen "  movsx r10, r10d"
          if size == 4 then
            codegen "  shl  r10, 2"
            codegen "  " + ostr + " rax, r10"
          elsif size == 8 then
            codegen "  shl  r10, 2"
            codegen "  " + ostr + " rax, r10"
          else
            (0...size).each do
              codegen "  " + ostr + " rax, r10"
            end
          end
        else
          codegen "  mov  r10d, " + str
          codegen "  movsx r10, r10d"
          if size == 4 then
            codegen "  shl  r10, 2"
            codegen "  " + ostr + " rax, r10"
          elsif size == 8 then
            codegen "  shl  r10, 3"
            codegen "  " + ostr + " rax, r10"
          else
            (0...size).each do
              codegen "  " + ostr + " rax, r10"
            end
          end
        end
      elsif type_l == "int" && is_pointer_type?(type_r) then
        size = sizeof type_r[0,type_r.length-1]
        codegen "  movsx rax, eax"
        if size == 1 then
        elsif size == 4 then
          codegen "  shl  rax, 2"
        elsif size == 8 then
          codegen "  shl  rax, 3"
        else
          codegen "  mov  r11, rax"
          (0...size-1).each do
            codegen "  add  rax, r11"
          end
        end
        codegen "  " + ostr + " rax, " + str
        type_l = type_r
      else
        codegen "  " + ostr + " eax, " + str
       end
    end
    return type_l
  end

型チェックのとこと実際に加減算してるとこを修正。型を見て正しいサイズで加減算してる。サイズが4、8の時はshl使ってるけど、それ以外の時はaddをサイズ分繰り返すってアホなコードになってるよ。将来構造体をサポートしたとき見直そう。

動作テスト

どうかな。

~/myc$ myc o23.myc
~/myc$ ./o23
p + 0 = 0000000000c49260
p + 1 = 0000000000c49264
p + 2 = 0000000000c49268
p + 3 = 0000000000c4926c
p + 4 = 0000000000c49270
p + 5 = 0000000000c49274
p + 6 = 0000000000c49278
p + 7 = 0000000000c4927c
p + 8 = 0000000000c49280
p + 9 = 0000000000c49284

0 + p = 0000000000c49260
1 + p = 0000000000c49264
2 + p = 0000000000c49268
3 + p = 0000000000c4926c
4 + p = 0000000000c49270
5 + p = 0000000000c49274
6 + p = 0000000000c49278
7 + p = 0000000000c4927c
8 + p = 0000000000c49280
9 + p = 0000000000c49284
~/myc$

4つずつ増えてる。大丈夫そうだね。調子に乗ってもういっちょ行ってみるよ。

// int*型+int型
extern void *malloc(size_t size);
extern void free(void *ptr);
extern int puts(char *s);

int main()
{
    int *p = malloc(10*4);
    for(int i = 0; i < 10; i = i + 1)
        *(p+i) = i;
    for(int i = 0; i < 10; i = i + 1)
        printf("*(p + %d) = %d\n", i, *(p+i));
    free(p);
}

前回の間接参照演算子との組み合わせだよ。

~/myc$ myc o24.myc
~/myc$ ./o24
*(p + 0) = 0
*(p + 1) = 1
*(p + 2) = 2
*(p + 3) = 3
*(p + 4) = 4
*(p + 5) = 5
*(p + 6) = 6
*(p + 7) = 7
*(p + 8) = 8
*(p + 9) = 9
~/myc$

おお、動いた。次回は変数宣言の見直しだな。void型変数を弾くようにしないといけないよ。