1 #include "llvm/Analysis/Passes.h"
2 #include "llvm/ExecutionEngine/ExecutionEngine.h"
3 #include "llvm/ExecutionEngine/MCJIT.h"
4 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
5 #include "llvm/IR/DataLayout.h"
6 #include "llvm/IR/DerivedTypes.h"
7 #include "llvm/IR/IRBuilder.h"
8 #include "llvm/IR/LLVMContext.h"
9 #include "llvm/IR/Module.h"
10 #include "llvm/IR/Verifier.h"
11 #include "llvm/PassManager.h"
12 #include "llvm/Support/TargetSelect.h"
13 #include "llvm/Transforms/Scalar.h"
21 //===----------------------------------------------------------------------===//
23 //===----------------------------------------------------------------------===//
25 // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
26 // of these for known things.
31 tok_def = -2, tok_extern = -3,
34 tok_identifier = -4, tok_number = -5,
37 tok_if = -6, tok_then = -7, tok_else = -8,
38 tok_for = -9, tok_in = -10
41 static std::string IdentifierStr; // Filled in if tok_identifier
42 static double NumVal; // Filled in if tok_number
44 /// gettok - Return the next token from standard input.
46 static int LastChar = ' ';
48 // Skip any whitespace.
49 while (isspace(LastChar))
52 if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
53 IdentifierStr = LastChar;
54 while (isalnum((LastChar = getchar())))
55 IdentifierStr += LastChar;
57 if (IdentifierStr == "def") return tok_def;
58 if (IdentifierStr == "extern") return tok_extern;
59 if (IdentifierStr == "if") return tok_if;
60 if (IdentifierStr == "then") return tok_then;
61 if (IdentifierStr == "else") return tok_else;
62 if (IdentifierStr == "for") return tok_for;
63 if (IdentifierStr == "in") return tok_in;
64 return tok_identifier;
67 if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
72 } while (isdigit(LastChar) || LastChar == '.');
74 NumVal = strtod(NumStr.c_str(), 0);
78 if (LastChar == '#') {
79 // Comment until end of line.
80 do LastChar = getchar();
81 while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
87 // Check for end of file. Don't eat the EOF.
91 // Otherwise, just return the character as its ascii value.
92 int ThisChar = LastChar;
97 //===----------------------------------------------------------------------===//
98 // Abstract Syntax Tree (aka Parse Tree)
99 //===----------------------------------------------------------------------===//
101 /// ExprAST - Base class for all expression nodes.
104 virtual ~ExprAST() {}
105 virtual Value *Codegen() = 0;
108 /// NumberExprAST - Expression class for numeric literals like "1.0".
109 class NumberExprAST : public ExprAST {
112 NumberExprAST(double val) : Val(val) {}
113 virtual Value *Codegen();
116 /// VariableExprAST - Expression class for referencing a variable, like "a".
117 class VariableExprAST : public ExprAST {
120 VariableExprAST(const std::string &name) : Name(name) {}
121 virtual Value *Codegen();
124 /// BinaryExprAST - Expression class for a binary operator.
125 class BinaryExprAST : public ExprAST {
129 BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
130 : Op(op), LHS(lhs), RHS(rhs) {}
131 virtual Value *Codegen();
134 /// CallExprAST - Expression class for function calls.
135 class CallExprAST : public ExprAST {
137 std::vector<ExprAST*> Args;
139 CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
140 : Callee(callee), Args(args) {}
141 virtual Value *Codegen();
144 /// IfExprAST - Expression class for if/then/else.
145 class IfExprAST : public ExprAST {
146 ExprAST *Cond, *Then, *Else;
148 IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
149 : Cond(cond), Then(then), Else(_else) {}
150 virtual Value *Codegen();
153 /// ForExprAST - Expression class for for/in.
154 class ForExprAST : public ExprAST {
156 ExprAST *Start, *End, *Step, *Body;
158 ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
159 ExprAST *step, ExprAST *body)
160 : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
161 virtual Value *Codegen();
164 /// PrototypeAST - This class represents the "prototype" for a function,
165 /// which captures its name, and its argument names (thus implicitly the number
166 /// of arguments the function takes).
169 std::vector<std::string> Args;
171 PrototypeAST(const std::string &name, const std::vector<std::string> &args)
172 : Name(name), Args(args) {}
177 /// FunctionAST - This class represents a function definition itself.
182 FunctionAST(PrototypeAST *proto, ExprAST *body)
183 : Proto(proto), Body(body) {}
187 } // end anonymous namespace
189 //===----------------------------------------------------------------------===//
191 //===----------------------------------------------------------------------===//
193 /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current
194 /// token the parser is looking at. getNextToken reads another token from the
195 /// lexer and updates CurTok with its results.
197 static int getNextToken() {
198 return CurTok = gettok();
201 /// BinopPrecedence - This holds the precedence for each binary operator that is
203 static std::map<char, int> BinopPrecedence;
205 /// GetTokPrecedence - Get the precedence of the pending binary operator token.
206 static int GetTokPrecedence() {
207 if (!isascii(CurTok))
210 // Make sure it's a declared binop.
211 int TokPrec = BinopPrecedence[CurTok];
212 if (TokPrec <= 0) return -1;
216 /// Error* - These are little helper functions for error handling.
217 ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
218 PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
219 FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
221 static ExprAST *ParseExpression();
225 /// ::= identifier '(' expression* ')'
226 static ExprAST *ParseIdentifierExpr() {
227 std::string IdName = IdentifierStr;
229 getNextToken(); // eat identifier.
231 if (CurTok != '(') // Simple variable ref.
232 return new VariableExprAST(IdName);
235 getNextToken(); // eat (
236 std::vector<ExprAST*> Args;
239 ExprAST *Arg = ParseExpression();
243 if (CurTok == ')') break;
246 return Error("Expected ')' or ',' in argument list");
254 return new CallExprAST(IdName, Args);
257 /// numberexpr ::= number
258 static ExprAST *ParseNumberExpr() {
259 ExprAST *Result = new NumberExprAST(NumVal);
260 getNextToken(); // consume the number
264 /// parenexpr ::= '(' expression ')'
265 static ExprAST *ParseParenExpr() {
266 getNextToken(); // eat (.
267 ExprAST *V = ParseExpression();
271 return Error("expected ')'");
272 getNextToken(); // eat ).
276 /// ifexpr ::= 'if' expression 'then' expression 'else' expression
277 static ExprAST *ParseIfExpr() {
278 getNextToken(); // eat the if.
281 ExprAST *Cond = ParseExpression();
284 if (CurTok != tok_then)
285 return Error("expected then");
286 getNextToken(); // eat the then
288 ExprAST *Then = ParseExpression();
289 if (Then == 0) return 0;
291 if (CurTok != tok_else)
292 return Error("expected else");
296 ExprAST *Else = ParseExpression();
299 return new IfExprAST(Cond, Then, Else);
302 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
303 static ExprAST *ParseForExpr() {
304 getNextToken(); // eat the for.
306 if (CurTok != tok_identifier)
307 return Error("expected identifier after for");
309 std::string IdName = IdentifierStr;
310 getNextToken(); // eat identifier.
313 return Error("expected '=' after for");
314 getNextToken(); // eat '='.
317 ExprAST *Start = ParseExpression();
318 if (Start == 0) return 0;
320 return Error("expected ',' after for start value");
323 ExprAST *End = ParseExpression();
324 if (End == 0) return 0;
326 // The step value is optional.
330 Step = ParseExpression();
331 if (Step == 0) return 0;
334 if (CurTok != tok_in)
335 return Error("expected 'in' after for");
336 getNextToken(); // eat 'in'.
338 ExprAST *Body = ParseExpression();
339 if (Body == 0) return 0;
341 return new ForExprAST(IdName, Start, End, Step, Body);
345 /// ::= identifierexpr
350 static ExprAST *ParsePrimary() {
352 default: return Error("unknown token when expecting an expression");
353 case tok_identifier: return ParseIdentifierExpr();
354 case tok_number: return ParseNumberExpr();
355 case '(': return ParseParenExpr();
356 case tok_if: return ParseIfExpr();
357 case tok_for: return ParseForExpr();
362 /// ::= ('+' primary)*
363 static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
364 // If this is a binop, find its precedence.
366 int TokPrec = GetTokPrecedence();
368 // If this is a binop that binds at least as tightly as the current binop,
369 // consume it, otherwise we are done.
370 if (TokPrec < ExprPrec)
373 // Okay, we know this is a binop.
375 getNextToken(); // eat binop
377 // Parse the primary expression after the binary operator.
378 ExprAST *RHS = ParsePrimary();
381 // If BinOp binds less tightly with RHS than the operator after RHS, let
382 // the pending operator take RHS as its LHS.
383 int NextPrec = GetTokPrecedence();
384 if (TokPrec < NextPrec) {
385 RHS = ParseBinOpRHS(TokPrec+1, RHS);
386 if (RHS == 0) return 0;
390 LHS = new BinaryExprAST(BinOp, LHS, RHS);
395 /// ::= primary binoprhs
397 static ExprAST *ParseExpression() {
398 ExprAST *LHS = ParsePrimary();
401 return ParseBinOpRHS(0, LHS);
405 /// ::= id '(' id* ')'
406 static PrototypeAST *ParsePrototype() {
407 if (CurTok != tok_identifier)
408 return ErrorP("Expected function name in prototype");
410 std::string FnName = IdentifierStr;
414 return ErrorP("Expected '(' in prototype");
416 std::vector<std::string> ArgNames;
417 while (getNextToken() == tok_identifier)
418 ArgNames.push_back(IdentifierStr);
420 return ErrorP("Expected ')' in prototype");
423 getNextToken(); // eat ')'.
425 return new PrototypeAST(FnName, ArgNames);
428 /// definition ::= 'def' prototype expression
429 static FunctionAST *ParseDefinition() {
430 getNextToken(); // eat def.
431 PrototypeAST *Proto = ParsePrototype();
432 if (Proto == 0) return 0;
434 if (ExprAST *E = ParseExpression())
435 return new FunctionAST(Proto, E);
439 /// toplevelexpr ::= expression
440 static FunctionAST *ParseTopLevelExpr() {
441 if (ExprAST *E = ParseExpression()) {
442 // Make an anonymous proto.
443 PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
444 return new FunctionAST(Proto, E);
449 /// external ::= 'extern' prototype
450 static PrototypeAST *ParseExtern() {
451 getNextToken(); // eat extern.
452 return ParsePrototype();
455 //===----------------------------------------------------------------------===//
457 //===----------------------------------------------------------------------===//
459 static Module *TheModule;
460 static IRBuilder<> Builder(getGlobalContext());
461 static std::map<std::string, Value*> NamedValues;
462 static FunctionPassManager *TheFPM;
464 Value *ErrorV(const char *Str) { Error(Str); return 0; }
466 Value *NumberExprAST::Codegen() {
467 return ConstantFP::get(getGlobalContext(), APFloat(Val));
470 Value *VariableExprAST::Codegen() {
471 // Look this variable up in the function.
472 Value *V = NamedValues[Name];
473 return V ? V : ErrorV("Unknown variable name");
476 Value *BinaryExprAST::Codegen() {
477 Value *L = LHS->Codegen();
478 Value *R = RHS->Codegen();
479 if (L == 0 || R == 0) return 0;
482 case '+': return Builder.CreateFAdd(L, R, "addtmp");
483 case '-': return Builder.CreateFSub(L, R, "subtmp");
484 case '*': return Builder.CreateFMul(L, R, "multmp");
486 L = Builder.CreateFCmpULT(L, R, "cmptmp");
487 // Convert bool 0/1 to double 0.0 or 1.0
488 return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
490 default: return ErrorV("invalid binary operator");
494 Value *CallExprAST::Codegen() {
495 // Look up the name in the global module table.
496 Function *CalleeF = TheModule->getFunction(Callee);
498 return ErrorV("Unknown function referenced");
500 // If argument mismatch error.
501 if (CalleeF->arg_size() != Args.size())
502 return ErrorV("Incorrect # arguments passed");
504 std::vector<Value*> ArgsV;
505 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
506 ArgsV.push_back(Args[i]->Codegen());
507 if (ArgsV.back() == 0) return 0;
510 return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
513 Value *IfExprAST::Codegen() {
514 Value *CondV = Cond->Codegen();
515 if (CondV == 0) return 0;
517 // Convert condition to a bool by comparing equal to 0.0.
518 CondV = Builder.CreateFCmpONE(CondV,
519 ConstantFP::get(getGlobalContext(), APFloat(0.0)),
522 Function *TheFunction = Builder.GetInsertBlock()->getParent();
524 // Create blocks for the then and else cases. Insert the 'then' block at the
525 // end of the function.
526 BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
527 BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
528 BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
530 Builder.CreateCondBr(CondV, ThenBB, ElseBB);
533 Builder.SetInsertPoint(ThenBB);
535 Value *ThenV = Then->Codegen();
536 if (ThenV == 0) return 0;
538 Builder.CreateBr(MergeBB);
539 // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
540 ThenBB = Builder.GetInsertBlock();
543 TheFunction->getBasicBlockList().push_back(ElseBB);
544 Builder.SetInsertPoint(ElseBB);
546 Value *ElseV = Else->Codegen();
547 if (ElseV == 0) return 0;
549 Builder.CreateBr(MergeBB);
550 // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
551 ElseBB = Builder.GetInsertBlock();
554 TheFunction->getBasicBlockList().push_back(MergeBB);
555 Builder.SetInsertPoint(MergeBB);
556 PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
559 PN->addIncoming(ThenV, ThenBB);
560 PN->addIncoming(ElseV, ElseBB);
564 Value *ForExprAST::Codegen() {
570 // variable = phi [start, loopheader], [nextvariable, loopend]
576 // nextvariable = variable + step
578 // br endcond, loop, endloop
581 // Emit the start code first, without 'variable' in scope.
582 Value *StartVal = Start->Codegen();
583 if (StartVal == 0) return 0;
585 // Make the new basic block for the loop header, inserting after current
587 Function *TheFunction = Builder.GetInsertBlock()->getParent();
588 BasicBlock *PreheaderBB = Builder.GetInsertBlock();
589 BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
591 // Insert an explicit fall through from the current block to the LoopBB.
592 Builder.CreateBr(LoopBB);
594 // Start insertion in LoopBB.
595 Builder.SetInsertPoint(LoopBB);
597 // Start the PHI node with an entry for Start.
598 PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
599 Variable->addIncoming(StartVal, PreheaderBB);
601 // Within the loop, the variable is defined equal to the PHI node. If it
602 // shadows an existing variable, we have to restore it, so save it now.
603 Value *OldVal = NamedValues[VarName];
604 NamedValues[VarName] = Variable;
606 // Emit the body of the loop. This, like any other expr, can change the
607 // current BB. Note that we ignore the value computed by the body, but don't
609 if (Body->Codegen() == 0)
612 // Emit the step value.
615 StepVal = Step->Codegen();
616 if (StepVal == 0) return 0;
618 // If not specified, use 1.0.
619 StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
622 Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
624 // Compute the end condition.
625 Value *EndCond = End->Codegen();
626 if (EndCond == 0) return EndCond;
628 // Convert condition to a bool by comparing equal to 0.0.
629 EndCond = Builder.CreateFCmpONE(EndCond,
630 ConstantFP::get(getGlobalContext(), APFloat(0.0)),
633 // Create the "after loop" block and insert it.
634 BasicBlock *LoopEndBB = Builder.GetInsertBlock();
635 BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
637 // Insert the conditional branch into the end of LoopEndBB.
638 Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
640 // Any new code will be inserted in AfterBB.
641 Builder.SetInsertPoint(AfterBB);
643 // Add a new entry to the PHI node for the backedge.
644 Variable->addIncoming(NextVar, LoopEndBB);
646 // Restore the unshadowed variable.
648 NamedValues[VarName] = OldVal;
650 NamedValues.erase(VarName);
653 // for expr always returns 0.0.
654 return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
657 Function *PrototypeAST::Codegen() {
658 // Make the function type: double(double,double) etc.
659 std::vector<Type*> Doubles(Args.size(),
660 Type::getDoubleTy(getGlobalContext()));
661 FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
664 Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
666 // If F conflicted, there was already something named 'Name'. If it has a
667 // body, don't allow redefinition or reextern.
668 if (F->getName() != Name) {
669 // Delete the one we just made and get the existing one.
670 F->eraseFromParent();
671 F = TheModule->getFunction(Name);
673 // If F already has a body, reject this.
675 ErrorF("redefinition of function");
679 // If F took a different number of args, reject.
680 if (F->arg_size() != Args.size()) {
681 ErrorF("redefinition of function with different # args");
686 // Set names for all arguments.
688 for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
690 AI->setName(Args[Idx]);
692 // Add arguments to variable symbol table.
693 NamedValues[Args[Idx]] = AI;
699 Function *FunctionAST::Codegen() {
702 Function *TheFunction = Proto->Codegen();
703 if (TheFunction == 0)
706 // Create a new basic block to start insertion into.
707 BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
708 Builder.SetInsertPoint(BB);
710 if (Value *RetVal = Body->Codegen()) {
711 // Finish off the function.
712 Builder.CreateRet(RetVal);
714 // Validate the generated code, checking for consistency.
715 verifyFunction(*TheFunction);
717 // Optimize the function.
718 TheFPM->run(*TheFunction);
723 // Error reading body, remove function.
724 TheFunction->eraseFromParent();
728 //===----------------------------------------------------------------------===//
729 // Top-Level parsing and JIT Driver
730 //===----------------------------------------------------------------------===//
732 static ExecutionEngine *TheExecutionEngine;
734 static void HandleDefinition() {
735 if (FunctionAST *F = ParseDefinition()) {
736 if (Function *LF = F->Codegen()) {
737 fprintf(stderr, "Read function definition:");
741 // Skip token for error recovery.
746 static void HandleExtern() {
747 if (PrototypeAST *P = ParseExtern()) {
748 if (Function *F = P->Codegen()) {
749 fprintf(stderr, "Read extern: ");
753 // Skip token for error recovery.
758 static void HandleTopLevelExpression() {
759 // Evaluate a top-level expression into an anonymous function.
760 if (FunctionAST *F = ParseTopLevelExpr()) {
761 if (Function *LF = F->Codegen()) {
762 TheExecutionEngine->finalizeObject();
763 // JIT the function, returning a function pointer.
764 void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
766 // Cast it to the right type (takes no arguments, returns a double) so we
767 // can call it as a native function.
768 double (*FP)() = (double (*)())(intptr_t)FPtr;
769 fprintf(stderr, "Evaluated to %f\n", FP());
772 // Skip token for error recovery.
777 /// top ::= definition | external | expression | ';'
778 static void MainLoop() {
780 fprintf(stderr, "ready> ");
782 case tok_eof: return;
783 case ';': getNextToken(); break; // ignore top-level semicolons.
784 case tok_def: HandleDefinition(); break;
785 case tok_extern: HandleExtern(); break;
786 default: HandleTopLevelExpression(); break;
791 //===----------------------------------------------------------------------===//
792 // "Library" functions that can be "extern'd" from user code.
793 //===----------------------------------------------------------------------===//
795 /// putchard - putchar that takes a double and returns 0.
797 double putchard(double X) {
802 //===----------------------------------------------------------------------===//
804 //===----------------------------------------------------------------------===//
807 InitializeNativeTarget();
808 InitializeNativeTargetAsmPrinter();
809 InitializeNativeTargetAsmParser();
810 LLVMContext &Context = getGlobalContext();
812 // Install standard binary operators.
813 // 1 is lowest precedence.
814 BinopPrecedence['<'] = 10;
815 BinopPrecedence['+'] = 20;
816 BinopPrecedence['-'] = 20;
817 BinopPrecedence['*'] = 40; // highest.
819 // Prime the first token.
820 fprintf(stderr, "ready> ");
823 // Make the module, which holds all the code.
824 std::unique_ptr<Module> Owner = make_unique<Module>("my cool jit", Context);
825 TheModule = Owner.get();
827 // Create the JIT. This takes ownership of the module.
829 TheExecutionEngine = EngineBuilder(std::move(Owner))
830 .setErrorStr(&ErrStr)
831 .setMCJITMemoryManager(new SectionMemoryManager())
833 if (!TheExecutionEngine) {
834 fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
838 FunctionPassManager OurFPM(TheModule);
840 // Set up the optimizer pipeline. Start with registering info about how the
841 // target lays out data structures.
842 TheModule->setDataLayout(TheExecutionEngine->getDataLayout());
843 OurFPM.add(new DataLayoutPass());
844 // Provide basic AliasAnalysis support for GVN.
845 OurFPM.add(createBasicAliasAnalysisPass());
846 // Do simple "peephole" optimizations and bit-twiddling optzns.
847 OurFPM.add(createInstructionCombiningPass());
848 // Reassociate expressions.
849 OurFPM.add(createReassociatePass());
850 // Eliminate Common SubExpressions.
851 OurFPM.add(createGVNPass());
852 // Simplify the control flow graph (deleting unreachable blocks, etc).
853 OurFPM.add(createCFGSimplificationPass());
855 OurFPM.doInitialization();
857 // Set the global so the code gen can use this.
860 // Run the main "interpreter loop" now.
865 // Print out all of the generated code.