コンパイラ作成(58) char*型の値を返す関数

今回の目標

引き続きchar*型のサポートを頑張るよ。

// char*型を返す関数の呼び出し
char *answer()
{
    //  Answer to the Ultimate Question of Life, the Universe, and Everything
    return "forty-two";
}

int main()
{
    puts(answer());
}

今回はchar*型を返す関数だよ。

function

int型しか受け付けなかったのを変更。

  # 関数の構文解析
  def function()
    @labelcnt = 0
    @lvars = Hash.new
    @lvarsize = 0;
    rettype = nil
    kind, str = @lex.gettoken
    rettype = "int"
    if kind == TK::TYPE then
      rettype = str
      kind, str = @lex.gettoken
    end
    if kind == TK::SYMBOL && str == "*" then
      rettype += str
      kind, str = @lex.gettoken
    end
    if kind == TK::EOF then return false end
    if kind != TK::ID then perror "expected identifier" end
    @funcname = str
    parametersize = []
    kind, str = @lex.gettoken
    if kind != TK::SYMBOL || str != "(" then perror end
    # 引数の処理
    kind, str = @lex.gettoken
    loop do
      if kind == TK::SYMBOL && str == ")" then break end
      if kind == TK::TYPE then
        type = str
        kind, str = @lex.gettoken
        if kind == TK::SYMBOL && str == "*" then
          type += str
          kind, str = @lex.gettoken
        end
        if kind != TK::ID then perror "wrong parameter name" end
        print "para "+str+"\n" if $opt_d
        size = sizeof type
        @lvarsize += size
        parametersize << size
        if @lvars[str] then perror "redefinition parameter \"" + str +"\"" end
        @lvars[str] = [type,@lvarsize]
      else
        perror
      end
      kind, str = @lex.gettoken
      if kind == TK::SYMBOL && str == "," then
        kind, str = @lex.gettoken
      end
    end
    codegen ".global "+@funcname
    codegen @funcname+":"
    codegen "  push rbp"
    codegen "  mov  rbp, rsp"
    # 仮のコードを作成
    idx = codegen "  sub  rsp, xx"
    # レジスタで渡された引数をスタックの領域に移す
    n = @lvars.size
    offset = 0
    (0...n).each do |i|
      offset += parametersize[i]
      if parametersize[i] == 4 then
        codegen "  mov  dword ptr [rbp - #{offset}], #{@regs32[i]}"
      else
        codegen "  mov  qword ptr [rbp - #{offset}], #{@regs64[i]}"
      end
    end
    kind, str = @lex.gettoken
    # 関数本体の処理
    if kind != TK::SYMBOL || str != "{" then perror end
    kind, str = block
    codegen ".RET_" + @funcname + ":"
    # 16の倍数になるように揃える
    size = (@lvarsize+15) / 16 * 16
    # 正しいサイズでコードを生成し置き換える
    if size != 0 then
      codechange idx,"  sub  rsp, #{size}"
      codegen "  add  rsp, #{size}"
    else
      codechange idx,nil
    end
    codegen "  pop  rbp"
    codegen "  ret"
    if @functions[@funcname] != nil then perror "redefinition of \"" + @funcname + "\"" end
    @functions[@funcname] = [rettype,[]]
    p @lvars if $opt_d   # デバッグ用
    optimize if $opt_O != 0
    codeflush
    @funcname = nil
    return true
  end

char**型とかには対応してないよ。いつかサポートしたいけどずっと先の話かも。

codegen_func

ここも返値で型を返すように変更。

  # 関数コールのコード生成
  def codegen_func operand
    type = "int"
    if @numuseregs != 0 then
      if @numuseregs % 2 == 1 then codegen "  sub  rsp, 8" end
      (0...@numuseregs).each do |i| codegen "  push #{@regs64[i]}" end
    end
    (0...operand.size-2).each do |i|
      save = @numuseregs
      @numuseregs = i
      type = codegen_el operand[i+2]
      @numuseregs = save
      if type == "int"
        codegen "  mov  #{@regs32[i]}, eax"
      elsif type == "char*"
        codegen "  mov  #{@regs64[i]}, rax"
      else
        perror
      end
    end
    codegen "  call " + operand[0].str
    f = @functions[operand[0].str]
    if f != nil then
      type = f[0]
    end
    if @numuseregs != 0 then
      (0...@numuseregs).reverse_each do |i| codegen "  pop  #{@regs64[i]}" end
      if @numuseregs % 2 == 1 then codegen "  add  rsp, 8" end
    end
    return type
  end

Hashのfunctionsを参照するようにしたよ。このHashで管理してる関数のデータは今まで全然参照してなかったよ。これ参照するんで関数を前方参照すると問題が生じるよ。この辺はC言語と同じなんだけど、mycはまだ関数宣言できないからちょっと問題だな。関数の情報が無かったときはデフォルトのint型として処理してるよ。こういう場合モダンなコンパイラはワーニングを出してるよね。mycもやった方が良いんだろうな。

コード生成部

codegen_funcが型情報を返すようになったんで、それに対応。

  # 式のコード生成(二項演算の左側被演算子)
  def codegen_elf(operand)
    type = "int"
    if operand.kind_of?(Array) then
      if !operand[0].kind_of?(Array) && operand[0].kind == TK::ID && operand[1].str == "()" then
        type = codegen_func operand
      else
        type = codegen_el operand
      end
    elsif operand.kind == TK::NUMBER then
      codegen "  mov  eax, " + operand.str
    elsif operand.kind == TK::ID then
      v = @lvars[operand.str]
      if v == nil then
        perror "undeclared variable \"" + operand.str + "\""
      end
      codegen "  mov  eax, dword ptr [rbp - " + v[1].to_s + "]"
    elsif operand.kind == TK::STRING then
      type = "char*"
      label = addliteral operand.str
      codegen "  lea  rax, "+label
    else
      perror
    end
    return type
  end

  # 式のコード生成(二項演算の右側被演算子)
  def codegen_els(op, operand, type_l)
    if op.str == "+" then
      ostr = "add "
    elsif op.str == "-" then
      ostr = "sub "
    elsif op.str == "*" then
      ostr = "imul"
    elsif op.str == "/" then
      ostr = "idiv"
    elsif op.str == "==" then
      ostr = "cmp "
    elsif op.str == "!=" then
      ostr = "cmp "
    elsif op.str == "<" || op.str == "<" || op.str == ">" || op.str == "<=" || op.str == ">=" then
      ostr = "cmp "
    else
      perror "unknown operator \"" + op.str + "\""
    end

    # 右被演算子を評価
    type_r = "int"
    if operand.kind_of?(Array) then
      if operand[0].size == 2 && operand[0].kind == TK::ID && operand[1].str == "()" then
        codegen "  sub  rsp, 8"
        codegen "  push rax"
        type_r = codegen_func operand
        codegen "  mov  r10d, eax"
        codegen "  pop  rax"
        codegen "  add  rsp, 8"
      else
        codegen "  sub  rsp, 8"
        codegen "  push rax"
        type_r = codegen_el operand
        codegen "  mov  r10d, eax"
        codegen "  pop  rax"
        codegen "  add  rsp, 8"
      end
      str = "r10d"
    elsif operand.kind == TK::ID then
      v = @lvars[operand.str]
      if v == nil then
        perror "undeclared variable \"" + operand.str + "\""
      end
      type_r = v[0]
      if type_r == "char*"
        str = "qword ptr [rbp - " + v[1].to_s + "]"
      else
        str = "dword ptr [rbp - " + v[1].to_s + "]"
      end
    elsif operand.kind == TK::NUMBER then
      str = operand.str
    elsif operand.kind == TK::STRING then
      type_r = "char*"
      label = addliteral operand.str
      codegen "  lea  r10, "+label
      str = "r10"
    else
      perror
    end

    # 型チェック
    if type_l != type_r then
      if type_l == "char*" && type_r == "int" then
        if op.str != "+" && op.str != "-" then
          perror "mismatched types to binary operation"
        end
      elsif type_l == "int" && type_r == "char*" then
        if op.str != "+" && op.str != "-" then
          perror "mismatched types to binary operation"
        end
      else
        perror "mismatched types to binary operation"
      end
    elsif type_l == "char*" then
      perror "mismatched types to binary operation"
    end

    # 左被演算子と右被演算子とで計算
    if op.str == "==" then
      codegen "  " + ostr + " eax, " + str
      codegen "  sete al"
      codegen "  and  eax, 1"
    elsif op.str == "!=" then
      codegen "  " + ostr + " eax, " + str
      codegen "  setne al"
      codegen "  and  eax, 1"
    elsif op.str == "<" then
      codegen "  " + ostr + " eax, " + str
      codegen "  setl al"
      codegen "  and  eax, 1"
    elsif op.str == ">" then
      codegen "  " + ostr + " eax, " + str
      codegen "  setg al"
      codegen "  and  eax, 1"
    elsif op.str == "<=" then
      codegen "  " + ostr + " eax, " + str
      codegen "  setle al"
      codegen "  and  eax, 1"
    elsif op.str == ">=" then
      codegen "  " + ostr + " eax, " + str
      codegen "  setge al"
      codegen "  and  eax, 1"
    elsif op.str == "*" || op.str == "/" then
      if str != "r10d" then codegen "  mov  r10d, " + str end
      codegen "  mov  r11, rdx"
      if op.str == "/" then
        codegen "  cdq"
      end
      codegen "  " + ostr + " r10d"
      codegen "  mov  rdx, r11"
    else
      if type_l == "char*" && type_r == "int" then
        if str == op.str then
          codegen "  " + ostr + " rax, " + str
        elsif str == "r10d" then
          codegen "  movsx r10, r10d"
          codegen "  " + ostr + " rax, r10"
        else
          codegen "  mov  r10d, " + str
          codegen "  movsx r10, r10d"
          codegen "  " + ostr + " rax, r10"
        end
      elsif type_l == "int" && type_r == "char*" then
        codegen "  movsx rax, eax"
        codegen "  " + ostr + " rax, " + str
        type_l = "char*"
      else
        codegen "  " + ostr + " eax, " + str
       end
    end
    return type_l
  end

ちゃんと返値を受け取るようにしただけ。

動作テスト

それじゃ行くよ。

~/myc$ myc -d o15.myc
[forty-two]
[forty-two]
{}
[[puts, (), [[answer, ()]]]]
[[puts, (), [[answer, ()]]]]
{}
{"answer"=>["char*", []], "main"=>["int", []]}
~/myc$ ./o15
forty-two
~/myc$ 

動いたよ。さて次は何やろうかな。char*型のサポートはちょっと飽きてきたなあ。何か違うことしたい。