コンパイラ作成(100) 配列の参照
今回の目標
今回は配列の参照だよ。
// 配列 int main() { int a[10], *p; p = a; *p = 42; printf("a[0] = %d\n", a[0]); printf("0[a] = %d\n", 0[a]); printf("10 + a[0] = %d\n", 10 + a[0]); printf("10 + 0[a] = %d\n", 10 + 0[a]); }
配列への代入は今回はやらないんでポインタを通して値を設定してる。
read_el
今回は修正箇所多いよ。まずは式リストの読み込み処理。
# 式の最後までのトークンを読み込む # 返り値 # el : expression's token list # kind : 処理しなかったトークン(セミコロンもしくは閉じ括弧) # str : 処理しなかったトークン(セミコロンもしくは閉じ括弧) def read_el(fkind,fstr,skind,sstr) el = [] if fkind == TK::ID && sstr == "(" then # 関数呼出の処理 sel = [] sel << Token.new(fkind,fstr) sel << Token.new(skind,"()") skind, sstr = @lex.gettoken loop do if skind == TK::SYMBOL && sstr == ")" then break end fkind, fstr = skind, sstr skind, sstr = @lex.gettoken pel, skind, sstr = read_modify_el fkind, fstr, skind, sstr sel << pel if skind == TK::SYMBOL && sstr == "," then skind, sstr = @lex.gettoken elsif skind != TK::SYMBOL || sstr != ")" then perror end end el << sel if skind != TK::SYMBOL || sstr != ")" then perror end skind, sstr = @lex.gettoken elsif fstr == "(" then # 括弧の処理 fkind, fstr = skind, sstr skind, sstr = @lex.gettoken sel, skind, sstr = read_el fkind, fstr, skind, sstr el << sel skind, sstr = @lex.gettoken elsif sstr == "[" then # 配列の処理 el << Token.new(fkind,fstr) el << Token.new(skind,"[]") fkind, fstr = @lex.gettoken skind, sstr = @lex.gettoken sel, skind, sstr = read_el fkind, fstr, skind, sstr if sel.size > 1 then el << sel else el << sel[0] end skind, sstr = @lex.gettoken else el << Token.new(fkind,fstr) end loop do if skind == TK::EOF then break end if sstr == ";" then break end if sstr == ")" then break end if sstr == "]" then break end if sstr == "," then break end if sstr == "}" then break end fkind, fstr = skind, sstr skind, sstr = @lex.gettoken if fkind == TK::ID && sstr == "(" then # 関数呼出の処理 sel = [] sel << Token.new(fkind,fstr) sel << Token.new(skind,"()") skind, sstr = @lex.gettoken loop do if skind == TK::SYMBOL && sstr == ")" then break end fkind, fstr = skind, sstr skind, sstr = @lex.gettoken pel, skind, sstr = read_el fkind, fstr, skind, sstr sel << pel if skind == TK::SYMBOL && sstr == "," then skind, sstr = @lex.gettoken elsif skind != TK::SYMBOL || sstr != ")" then perror end end el << sel if skind != TK::SYMBOL || sstr != ")" then perror end skind, sstr = @lex.gettoken elsif fstr == "(" then # 括弧の処理 fkind, fstr = skind, sstr skind, sstr = @lex.gettoken sel, skind, sstr = read_el fkind, fstr, skind, sstr el << sel skind, sstr = @lex.gettoken elsif sstr == "[" then # 配列の処理 el << Token.new(fkind,fstr) el << Token.new(skind,"[]") fkind, fstr = @lex.gettoken skind, sstr = @lex.gettoken sel, skind, sstr = read_el fkind, fstr, skind, sstr if sel.size > 1 then el << sel else el << sel[0] end skind, sstr = @lex.gettoken else el << Token.new(fkind,fstr) end end return el, skind, sstr end
配列をa[0]から(a[]0)へと変形してる。こうすることで二項演算子と同じになるんで後の処理が楽になるよ。今回の修正ではエラー処理とかすっ飛ばしちゃってるよ。とりあえず正常なソースをコンパイルできるところまで何とかするけど、それ以外はあちこち手抜きだよ。
read_modify_el
次はここ。
# 式の最後までのトークンを読み込み、変形する def read_modify_el(fkind,fstr,skind,sstr) el, kind, str = read_el fkind, fstr, skind, sstr puts to_str(el) if $opt_d # デバッグ用 el = modify_el_unaryop el, ["+","-","&","*"] el = modify_el el, ["[]"] el = modify_el el, ["*","/","%"] el = modify_el el, ["+","-"] el = modify_el el, ["<",">","<=",">="] el = modify_el el, ["==","!="] el = modify_el el, ["="], :r_to_l puts to_str(el) if $opt_d # デバッグ用 return el, kind, str end
[]を追加したんだけどこれ問題ありだよ。C言語の仕様では配列は単項演算子より優先順位が上なんで、本当はel = modify_el el, ["[]"]をel = modify_el_unaryop el, ["+","-","&","*"]より前に持っていかなきゃならないんだけど、modify_elは単項演算子が混じってるelに対応できるようになってないんだよね。それで今回はこういう順番にしちゃった。配列のサポートがある程度出来上がったら修正しようと思ってるよ。
新しいメソッド
codegen_elsから処理を切り出したよ。
# ポインタ型+int型のコード生成 def codegen_pointer_int(type_l,op,ostr,str) size = sizeof type_l[0,type_l.length-1] if str == op.str then codegen " " + ostr + " rax, " + (str.to_i*size).to_s elsif str == "r10d" then codegen " movsx r10, r10d" if size == 4 then codegen " shl r10, 2" codegen " " + ostr + " rax, r10" elsif size == 8 then codegen " shl r10, 2" codegen " " + ostr + " rax, r10" else (0...size).each do codegen " " + ostr + " rax, r10" end end else codegen " mov r10d, " + str codegen " movsx r10, r10d" if size == 4 then codegen " shl r10, 2" codegen " " + ostr + " rax, r10" elsif size == 8 then codegen " shl r10, 3" codegen " " + ostr + " rax, r10" else (0...size).each do codegen " " + ostr + " rax, r10" end end end end # int型+ポインタ型のコード生成 def codegen_int_pointer(type_r,op,ostr,str) size = sizeof type_r[0,type_r.length-1] codegen " movsx rax, eax" if size == 1 then elsif size == 4 then codegen " shl rax, 2" elsif size == 8 then codegen " shl rax, 3" else codegen " mov r11, rax" (0...size-1).each do codegen " add rax, r11" end end codegen " " + ostr + " rax, " + str end
配列の参照処理はポインタとint型の加算と同じような処理なんで切り出したメソッドを活用してくよ。
mnemonic
# ニーモニック def mnemonic(op, type) if type == "double" then if op.str == "+" then return "addsd " elsif op.str == "-" then return "subsd " elsif op.str == "*" then return "mulsd " elsif op.str == "/" then return "divsd " elsif op.str == "%" then return "divsd " elsif op.str == "==" then return "ucomisd" elsif op.str == "!=" then return "ucomisd" elsif op.str == "<" || op.str == "<" || op.str == ">" || op.str == "<=" || op.str == ">=" then return "ucomisd" elsif op.str == "[]" then return "addsd " else perror "unknown operator \"" + op.str + "\"" end else if op.str == "+" then return "add " elsif op.str == "-" then return "sub " elsif op.str == "*" then return "imul" elsif op.str == "/" then return "idiv" elsif op.str == "%" then return "idiv" elsif op.str == "==" then return "cmp " elsif op.str == "!=" then return "cmp " elsif op.str == "<" || op.str == "<" || op.str == ">" || op.str == "<=" || op.str == ">=" then return "cmp " elsif op.str == "[]" then return "add " else perror "unknown operator \"" + op.str + "\"" end end end
ここは[]を追加しただけ。
codegen_els
コード生成部。あちこちに手を入れたよ。
# 式のコード生成(二項演算の右側被演算子) def codegen_els(op, operand, type_l) # 右被演算子を評価 type_r = "int" if operand.kind_of?(Array) then if operand[0].size == 2 && operand[0].kind == TK::ID && operand[1].str == "()" then if type_l == "double" then codegen " sub rsp, 16" codegen " movsd [rsp], xmm8" type_r = codegen_func operand if type_r == "double" then codegen " movsd xmm9, xmm8" str = "xmm9" else codegen " mov r10d, eax" str = "r10d" end codegen " movsd xmm8, [rsp]" codegen " add rsp, 16" else codegen " sub rsp, 8" codegen " push rax" type_r = codegen_func operand if type_r == "double" then codegen " movsd xmm9, xmm8" str = "xmm9" else codegen " mov r10d, eax" str = "r10d" end codegen " pop rax" codegen " add rsp, 8" end else if type_l == "double" then codegen " sub rsp, 16" codegen " movsd [rsp], xmm8" type_r = codegen_el operand if type_r == "double" then codegen " movsd xmm9, xmm8" str = "xmm9" else codegen " mov r10d, eax" str = "r10d" end codegen " movsd xmm8, [rsp]" codegen " add rsp, 16" else codegen " sub rsp, 8" codegen " push rax" type_r = codegen_el operand if type_r == "double" then codegen " movsd xmm9, xmm8" str = "xmm9" else codegen " mov r10d, eax" str = "r10d" end codegen " pop rax" codegen " add rsp, 8" end end elsif operand.kind == TK::ID then v = get_var operand.str if v == nil then perror "undeclared variable \"" + operand.str + "\"" end type_r = v[0] if is_array_type? type_r then codegen " lea r10, [rbp - " + v[1].to_s + "]" str = "r10" elsif is_pointer_type? type_r then str = "qword ptr [rbp - " + v[1].to_s + "]" elsif type_r == "double" then str = "qword ptr [rbp - " + v[1].to_s + "]" elsif type_r == "char" then str = "byte ptr [rbp - " + v[1].to_s + "]" else str = "dword ptr [rbp - " + v[1].to_s + "]" end elsif operand.kind == TK::NUMBER then str = operand.str elsif operand.kind == TK::STRING then type_r = "char*" label = addliteral operand.str codegen " lea r10, "+label str = "r10" elsif operand.kind == TK::FLOAT then # 浮動小数点数リテラル type_r = "double" label = addliteral operand.str, :double str = "qword ptr [#{label}]" else perror end # 型チェック if type_l == "double" && type_r == "int" then if str != "r10d" then codegen " mov r10d, #{str}" end codegen " cvtsi2sd xmm9, r10d" str = "xmm9" type_r = type_l elsif type_l == "int" && type_r == "double" then codegen " cvtsi2sd xmm8, eax" type_l = type_r end if type_l != type_r then if is_pointer_type?(type_l) && type_r == "int" then if op.str != "+" && op.str != "-" then perror "mismatched types to binary operation" end elsif type_l == "int" && is_pointer_type?(type_r) then if op.str != "+" && op.str != "-" then perror "mismatched types to binary operation" end elsif is_array_type?(type_l) && type_r == "int" then if op.str != "[]" then perror "mismatched types to binary operation" end elsif type_l == "int" && is_array_type?(type_r) then if op.str != "[]" then perror "mismatched types to binary operation" end else perror "mismatched types to binary operation" end elsif is_pointer_type? type_l then perror "mismatched types to binary operation" elsif type_l == "double" && op.str == "%" then perror "mismatched types to binary operation" end reg = "eax" if type_l == "double" then reg = "xmm8" elsif type_l == "char" then reg = "al" end # 左被演算子と右被演算子とで計算 ostr = mnemonic op, type_l if op.str == "==" then codegen " " + ostr + " #{reg}, " + str if type_l == "double" then codegen " sete al" codegen " setnp r10b" codegen " and al, r10b" codegen " and eax, 1" type_l = "int" else codegen " sete al" codegen " and eax, 1" end elsif op.str == "!=" then codegen " " + ostr + " #{reg}, " + str if type_l == "double" then codegen " setne al" codegen " setp r10b" codegen " or al, r10b" codegen " and eax, 1" type_l = "int" else codegen " setne al" codegen " and eax, 1" end elsif op.str == "<" then codegen " " + ostr + " #{reg}, " + str if type_l == "double" then codegen " setb al" codegen " and eax, 1" type_l = "int" else codegen " setl al" codegen " and eax, 1" end elsif op.str == ">" then codegen " " + ostr + " #{reg}, " + str if type_l == "double" then codegen " seta al" codegen " and eax, 1" type_l = "int" else codegen " setg al" codegen " and eax, 1" end elsif op.str == "<=" then codegen " " + ostr + " #{reg}, " + str if type_l == "double" then codegen " setbe al" codegen " and eax, 1" type_l = "int" else codegen " setle al" codegen " and eax, 1" end elsif op.str == ">=" then codegen " " + ostr + " #{reg}, " + str if type_l == "double" then codegen " setae al" codegen " and eax, 1" type_l = "int" else codegen " setge al" codegen " and eax, 1" end elsif type_l != "double" && (op.str == "*" || op.str == "/" || op.str == "%") then mov = "mov" if type_r == "char" then mov = "movsx" end if type_l == "char" then codegen " movsx eax, al" end if str != "r10d" then codegen " #{mov} r10d, " + str end codegen " mov r11, rdx" if op.str == "/" || op.str == "%" then codegen " cdq" end codegen " " + ostr + " r10d" if op.str == "%" then codegen " mov eax, edx" end codegen " mov rdx, r11" else if is_pointer_type?(type_l) && type_r == "int" then # ポインタ型+int型の処理 codegen_pointer_int type_l, op, ostr, str elsif type_l == "int" && is_pointer_type?(type_r) then # int型+ポインタ型の処理 codegen_int_pointer type_r, op, ostr, str type_l = type_r elsif is_array_type?(type_l) && type_r == "int" then # ポインタ型+int型の処理 type_l = array_to_pointer type_l codegen_pointer_int type_l, op, ostr, str codegen " mov eax, [rax]" type_l = type_l[0,type_l.length-1] elsif type_l == "int" && is_array_type?(type_r) then type_r = array_to_pointer type_r codegen_int_pointer type_r, op, ostr, str codegen " mov eax, [rax]" type_l = type_r[0,type_r.length-1] else codegen " " + ostr + " #{reg}, " + str end end return type_l end
これで全部修正できたかな。
動作テスト
~/myc$ myc q4.myc ~/myc$ ./q4 a[0] = 42 0[a] = 42 10 + a[0] = 52 10 + 0[a] = 52 ~/myc$
おお、動いたよ。今回は修正箇所多くて大変だったけどなんとか上手く行った。さて次は何やろう。配列への代入かな。