From a95a02a16aaf37c2a8c4b445bdb52687fac01073 Mon Sep 17 00:00:00 2001 From: "Gerasimos (Makis) Maropoulos" Date: Mon, 27 Mar 2017 04:09:44 +0300 Subject: [PATCH] Nothing special here (yet) Former-commit-id: 826d7c370481b78afd9ba92f4ae8bef1fb85a567 --- _future/ipel/README.md | 106 ++++++++++++++++++++++ _future/ipel/ast/ast.go | 1 + _future/ipel/ast/param.go | 40 +++++++++ _future/ipel/ast/param_func.go | 74 ++++++++++++++++ _future/ipel/lexer/lexer.go | 136 +++++++++++++++++++++++++++++ _future/ipel/lexer/lexer_test.go | 54 ++++++++++++ _future/ipel/main.go | 20 +++++ _future/ipel/parser/parser.go | 61 +++++++++++++ _future/ipel/parser/parser_test.go | 48 ++++++++++ _future/ipel/repl/repl.go | 31 +++++++ _future/ipel/token/token.go | 62 +++++++++++++ _future/macros_test.go | 27 ++++++ _future/path_parser_test.go | 7 +- 13 files changed, 664 insertions(+), 3 deletions(-) create mode 100644 _future/ipel/README.md create mode 100644 _future/ipel/ast/ast.go create mode 100644 _future/ipel/ast/param.go create mode 100644 _future/ipel/ast/param_func.go create mode 100644 _future/ipel/lexer/lexer.go create mode 100644 _future/ipel/lexer/lexer_test.go create mode 100644 _future/ipel/main.go create mode 100644 _future/ipel/parser/parser.go create mode 100644 _future/ipel/parser/parser_test.go create mode 100644 _future/ipel/repl/repl.go create mode 100644 _future/ipel/token/token.go diff --git a/_future/ipel/README.md b/_future/ipel/README.md new file mode 100644 index 00000000..dceed675 --- /dev/null +++ b/_future/ipel/README.md @@ -0,0 +1,106 @@ +# Iris Path Expression Language (_future) + + +## Ideas & Goals + +- Optional. +- No Breaking Changes. +- No performance cost if not used. +- Can convert a path for the existing routers, if no router is being used, then it will use its own, new, router. +- 4+1 basic parameter types: `int`, `string`, `alphabetical`, `path`, (wildcard), `any` based on regexp. +- Each type has unlimited functions of its own, they should be able to be overriden. +- Give devs the ability to parse their function's arguments before use them and return a func which is the validator. +- Function will be a generic type(`interface{}`) in order to devs be able to use any type without boilerplate code for conversions, +can be done using reflection and reflect.Call, on .Boot time to parse the function automatically, and keep the returning validator function (already tested and worked). +- The `any` will be the default if dev use functions to the named path parameter but missing a type. +- If a type doesnt't contains a function of its own, then it will use the `any`'s, so `any` will contain global-use functions too. + +## Preview + +`/api/users/{id:int min(1)}/posts` + +```go +minValidator := func(min int) func(string) bool { + return func(paramValue string) bool { + paramValueInt, err := strconv.Atoi(paramValue) + if err != nil { + return false + } + if paramValueInt < min { + return false + } + return true + } +} + +app := iris.New() +app.Int.Set("min", minValidator) +``` + +`/api/{version:string len(2) isVersion()}` + +```go +isVersionStrValidator := func() func(string) bool { + versions := []string("v1","v2") + return func(paramValue string) bool { + for _, s := range versions { + if s == paramValue { + return true + } + } + return false + } +} + +lenStrValidator := func(i int) func(string) bool { + if i <= 0 { + i = 1 + } + return func(paramValue string) bool { + return len(paramValue) != i + } +} + + +app := iris.New() +app.String.Set("isVersion", isVersionStrValidator) +app.String.Set("len", lenStrValidator) +``` + +`/uploads/{filepath:tail contains(.) else 403}` + +```go +[...] + +[...] +``` + +`/api/validate/year/{year:int range(1970,2017) else 500}` + +```go +[...] + +[...] +``` + +## Resources +- [Lexical analysis](https://en.wikipedia.org/wiki/Lexical_analysis) **necessary** +- [Top-down parsing](https://en.wikipedia.org/wiki/Top-down_parsing) **necessary** +- [Recursive descent parser](https://en.wikipedia.org/wiki/Recursive_descent_parser) **basic, continue to the rest after** +- [Handwritten Parsers & Lexers in Go](https://blog.gopheracademy.com/advent-2014/parsers-lexers/) **very good** +- [Creating a VM / Compiler Episode 1: Bytecode VM](https://www.youtube.com/watch?v=DUNkdl0Jhgs) **nice to watch** +- [So you want to write an interpreter?](https://www.youtube.com/watch?v=LCslqgM48D4) **watch it, continue to the rest later on** +- [Writing a Lexer and Parser in Go - Part 1](http://adampresley.github.io/2015/04/12/writing-a-lexer-and-parser-in-go-part-1.html) **a different approach using the strategy pattern, not for production use in my opinion** +- [Writing a Lexer and Parser in Go - Part 2](http://adampresley.github.io/2015/05/12/writing-a-lexer-and-parser-in-go-part-2.html) +- [Writing a Lexer and Parser in Go - Part 3](http://adampresley.github.io/2015/06/01/writing-a-lexer-and-parser-in-go-part-3.html) +- [Writing An Interpreter In Go](https://www.amazon.com/Writing-Interpreter-Go-Thorsten-Ball/dp/300055808X) **I recommend this book: suitable for both experienced and novice developers** + + + diff --git a/_future/ipel/ast/ast.go b/_future/ipel/ast/ast.go new file mode 100644 index 00000000..bd412963 --- /dev/null +++ b/_future/ipel/ast/ast.go @@ -0,0 +1 @@ +package ast diff --git a/_future/ipel/ast/param.go b/_future/ipel/ast/param.go new file mode 100644 index 00000000..db51c216 --- /dev/null +++ b/_future/ipel/ast/param.go @@ -0,0 +1,40 @@ +package ast + +type ParamType uint8 + +const ( + ParamTypeUnExpected ParamType = iota + // /42 + ParamTypeInt + // /myparam1 + ParamTypeString + // /myparam + ParamTypeAlphabetical + // /myparam1/myparam2 + ParamPath +) + +var paramTypes = map[string]ParamType{ + "int": ParamTypeInt, + "string": ParamTypeString, + "alphabetical": ParamTypeAlphabetical, + "path": ParamPath, + // could be named also: + // "tail": + // "wild" + // "wildcard" +} + +func LookupParamType(ident string) ParamType { + if typ, ok := paramTypes[ident]; ok { + return typ + } + return ParamTypeUnExpected +} + +type ParamStatement struct { + Name string // id + Type ParamType // int + Funcs []ParamFunc // range + ErrorCode int // 404 +} diff --git a/_future/ipel/ast/param_func.go b/_future/ipel/ast/param_func.go new file mode 100644 index 00000000..f1db45f8 --- /dev/null +++ b/_future/ipel/ast/param_func.go @@ -0,0 +1,74 @@ +package ast + +import ( + "fmt" + "strconv" +) + +type ParamFuncArg interface{} + +func ParamFuncArgInt64(a ParamFuncArg) (int64, bool) { + if v, ok := a.(int64); ok { + return v, false + } + return -1, false +} + +func ParamFuncArgToInt64(a ParamFuncArg) (int64, error) { + switch a.(type) { + case int64: + return a.(int64), nil + case string: + return strconv.ParseInt(a.(string), 10, 64) + case int: + return int64(a.(int)), nil + default: + return -1, fmt.Errorf("unexpected function argument type: %q", a) + } +} + +func ParamFuncArgInt(a ParamFuncArg) (int, bool) { + if v, ok := a.(int); ok { + return v, false + } + return -1, false +} + +func ParamFuncArgToInt(a ParamFuncArg) (int, error) { + switch a.(type) { + case int: + return a.(int), nil + case string: + return strconv.Atoi(a.(string)) + case int64: + return int(a.(int64)), nil + default: + return -1, fmt.Errorf("unexpected function argument type: %q", a) + } +} + +func ParamFuncArgString(a ParamFuncArg) (string, bool) { + if v, ok := a.(string); ok { + return v, false + } + return "", false +} + +func ParamFuncArgToString(a ParamFuncArg) (string, error) { + switch a.(type) { + case string: + return a.(string), nil + case int: + return strconv.Itoa(a.(int)), nil + case int64: + return strconv.FormatInt(a.(int64), 10), nil + default: + return "", fmt.Errorf("unexpected function argument type: %q", a) + } +} + +// range(1,5) +type ParamFunc struct { + Name string // range + Args []ParamFuncArg // [1,5] +} diff --git a/_future/ipel/lexer/lexer.go b/_future/ipel/lexer/lexer.go new file mode 100644 index 00000000..0d11eea5 --- /dev/null +++ b/_future/ipel/lexer/lexer.go @@ -0,0 +1,136 @@ +package lexer + +import ( + "gopkg.in/kataras/iris.v6/_future/ipel/token" +) + +type Lexer struct { + input string + pos int // current pos in input, current char + readPos int // current reading pos in input, after current char + ch byte // current char under examination +} + +func New(input string) *Lexer { + l := &Lexer{ + input: input, + } + // step to the first character in order to be ready + l.readChar() + return l +} + +func (l *Lexer) readChar() { + if l.readPos >= len(l.input) { + l.ch = 0 + } else { + l.ch = l.input[l.readPos] + } + l.pos = l.readPos + l.readPos += 1 +} + +func (l *Lexer) NextToken() (t token.Token) { + l.skipWhitespace() + + switch l.ch { + case '{': + t = l.newTokenRune(token.LBRACE, l.ch) + case '}': + t = l.newTokenRune(token.RBRACE, l.ch) + // Let's keep it simple, no evaluation for logical operators, we are not making a new programming language, keep it simple makis. + // || + // case '|': + // if l.peekChar() == '|' { + // ch := l.ch + // l.readChar() + // t = token.Token{Type: token.OR, Literal: string(ch) + string(l.ch)} + // } + // == + case ':': + t = l.newTokenRune(token.COLON, l.ch) + case '(': + t = l.newTokenRune(token.LPAREN, l.ch) + case ')': + t = l.newTokenRune(token.RPAREN, l.ch) + case ',': + t = l.newTokenRune(token.COMMA, l.ch) + // literals + case 0: + t.Literal = "" + t.Type = token.EOF + default: + // letters + if isLetter(l.ch) { + lit := l.readIdentifier() + typ := token.LookupIdent(lit) + t = l.newToken(typ, lit) + return + // numbers + } else if isDigit(l.ch) { + lit := l.readNumber() + t = l.newToken(token.INT, lit) + return + } else { + t = l.newTokenRune(token.ILLEGAL, l.ch) + } + } + l.readChar() // set the pos to the next + return +} + +func (l *Lexer) newToken(tokenType token.TokenType, lit string) token.Token { + t := token.Token{ + Type: tokenType, + Literal: lit, + Start: l.pos, + End: l.pos, + } + + if l.pos > 1 && len(lit) > 1 { + t.End = t.Start + len(lit) - 1 + } + + return t +} + +func (l *Lexer) newTokenRune(tokenType token.TokenType, ch byte) token.Token { + return l.newToken(tokenType, string(ch)) +} + +func (l *Lexer) skipWhitespace() { + for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { + l.readChar() + } +} + +func (l *Lexer) readIdentifier() string { + pos := l.pos + for isLetter(l.ch) { + l.readChar() + } + return l.input[pos:l.pos] +} + +func (l *Lexer) peekChar() byte { + if l.readPos >= len(l.input) { + return 0 + } + return l.input[l.readPos] +} + +func isLetter(ch byte) bool { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' +} + +func (l *Lexer) readNumber() string { + pos := l.pos + for isDigit(l.ch) { + l.readChar() + } + return l.input[pos:l.pos] +} + +func isDigit(ch byte) bool { + return '0' <= ch && ch <= '9' +} diff --git a/_future/ipel/lexer/lexer_test.go b/_future/ipel/lexer/lexer_test.go new file mode 100644 index 00000000..d959f73b --- /dev/null +++ b/_future/ipel/lexer/lexer_test.go @@ -0,0 +1,54 @@ +package lexer + +import ( + "testing" + + "gopkg.in/kataras/iris.v6/_future/ipel/token" +) + +func TestNextToken(t *testing.T) { + input := `{id:int min(1) max(5) else 404}` + + tests := []struct { + expectedType token.TokenType + expectedLiteral string + }{ + {token.LBRACE, "{"}, // 0 + {token.IDENT, "id"}, // 1 + {token.COLON, ":"}, // 2 + {token.IDENT, "int"}, // 3 + {token.IDENT, "min"}, // 4 + {token.LPAREN, "("}, // 5 + {token.INT, "1"}, // 6 + {token.RPAREN, ")"}, // 7 + {token.IDENT, "max"}, // 8 + {token.LPAREN, "("}, // 9 + {token.INT, "5"}, // 10 + {token.RPAREN, ")"}, // 11 + {token.ELSE, "else"}, // 12 + {token.INT, "404"}, // 13 + {token.RBRACE, "}"}, // 14 + } + + l := New(input) + + for i, tt := range tests { + tok := l.NextToken() + + if tok.Type != tt.expectedType { + t.Fatalf("tests[%d] - tokentype wrong. expected=%q, got=%q", + i, tt.expectedType, tok.Type) + } + + if tok.Literal != tt.expectedLiteral { + t.Fatalf("tests[%d] - literal wrong. expected=%q, got=%q", + i, tt.expectedLiteral, tok.Literal) + } + + } +} + +// EMEINA STO: +// 30/232 selida apto making a interpeter in Go. +// den ekana to skipWhitespaces giati skeftomai +// an borei na to xreiastw 9a dw aurio. diff --git a/_future/ipel/main.go b/_future/ipel/main.go new file mode 100644 index 00000000..87e0fdf1 --- /dev/null +++ b/_future/ipel/main.go @@ -0,0 +1,20 @@ +package main + +import ( + "fmt" + "os" + "os/user" + + "gopkg.in/kataras/iris.v6/_future/ipel/repl" +) + +func main() { + user, err := user.Current() + if err != nil { + panic(err) + } + fmt.Printf("Hello %s! This is the Iris Path Expression Language!\n", + user.Username) + fmt.Printf("Feel free to type in commands\n") + repl.Start(os.Stdin, os.Stdout) +} diff --git a/_future/ipel/parser/parser.go b/_future/ipel/parser/parser.go new file mode 100644 index 00000000..188b2220 --- /dev/null +++ b/_future/ipel/parser/parser.go @@ -0,0 +1,61 @@ +package parser + +import ( + "fmt" + "strings" + + "gopkg.in/kataras/iris.v6/_future/ipel/ast" + "gopkg.in/kataras/iris.v6/_future/ipel/lexer" + "gopkg.in/kataras/iris.v6/_future/ipel/token" +) + +type Parser struct { + l *lexer.Lexer + errors []string +} + +func New(lexer *lexer.Lexer) *Parser { + p := &Parser{ + l: lexer, + } + + return p +} + +func (p *Parser) appendErr(format string, a ...interface{}) { + p.errors = append(p.errors, fmt.Sprintf(format, a...)) +} + +func (p *Parser) Parse() (*ast.ParamStatement, error) { + stmt := new(ast.ParamStatement) + for { + t := p.l.NextToken() + if t.Type == token.EOF { + break + } + + switch t.Type { + case token.LBRACE: + // name + nextTok := p.l.NextToken() + stmt.Name = nextTok.Literal + case token.COLON: + // type + nextTok := p.l.NextToken() + paramType := ast.LookupParamType(nextTok.Literal) + if paramType == ast.ParamTypeUnExpected { + p.appendErr("[%d:%d] unexpected parameter type: %s", t.Start, t.End, nextTok.Literal) + } + case token.ILLEGAL: + p.appendErr("[%d:%d] illegal token: %s", t.Start, t.End, t.Literal) + default: + p.appendErr("[%d:%d] unexpected token type: %q", t.Start, t.End, t.Type) + + } + } + + if len(p.errors) > 0 { + return nil, fmt.Errorf(strings.Join(p.errors, "\n")) + } + return stmt, nil +} diff --git a/_future/ipel/parser/parser_test.go b/_future/ipel/parser/parser_test.go new file mode 100644 index 00000000..8c328779 --- /dev/null +++ b/_future/ipel/parser/parser_test.go @@ -0,0 +1,48 @@ +package parser + +import ( + "fmt" + "strings" + "testing" + + "gopkg.in/kataras/iris.v6/_future/ipel/lexer" +) + +// Test is failing because we are not finished with the Parser yet +// 27/03 +func TestParseError(t *testing.T) { + // fail + illegalChar := '$' + + input := "{id" + string(illegalChar) + "int range(1,5) else 404}" + l := lexer.New(input) + p := New(l) + + _, err := p.Parse() + + if err == nil { + t.Fatalf("expecting not empty error on input '%s'", input) + } + + // println(input[8:9]) + // println(input[13:17]) + + illIdx := strings.IndexRune(input, illegalChar) + expectedErr := fmt.Sprintf("[%d:%d] illegal token: %s", illIdx, illIdx, "$") + if got := err.Error(); got != expectedErr { + t.Fatalf("expecting error to be '%s' but got: %s", expectedErr, got) + } + // + + // success + input2 := "{id:int range(1,5) else 404}" + l2 := lexer.New(input2) + p2 := New(l2) + + _, err = p2.Parse() + + if err != nil { + t.Fatalf("expecting empty error on input '%s', but got: %s", input2, err.Error()) + } + // +} diff --git a/_future/ipel/repl/repl.go b/_future/ipel/repl/repl.go new file mode 100644 index 00000000..58eb0b8a --- /dev/null +++ b/_future/ipel/repl/repl.go @@ -0,0 +1,31 @@ +package repl + +import ( + "bufio" + "fmt" + "io" + + "gopkg.in/kataras/iris.v6/_future/ipel/lexer" + "gopkg.in/kataras/iris.v6/_future/ipel/token" +) + +const PROMPT = ">> " + +func Start(in io.Reader, out io.Writer) { + scanner := bufio.NewScanner(in) + for { + fmt.Printf(PROMPT) + scanned := scanner.Scan() + if !scanned { + return + } + line := scanner.Text() + if line == "exit" { + break + } + l := lexer.New(line) + for tok := l.NextToken(); tok.Type != token.EOF; tok = l.NextToken() { + fmt.Printf("%+v\n", tok) + } + } +} diff --git a/_future/ipel/token/token.go b/_future/ipel/token/token.go new file mode 100644 index 00000000..fed7c365 --- /dev/null +++ b/_future/ipel/token/token.go @@ -0,0 +1,62 @@ +package token + +type TokenType int + +type Token struct { + Type TokenType + Literal string + Start int // excluding, useful for user + End int // excluding, useful for user and index +} + +func (t Token) StartIndex() int { + if t.Start > 0 { + return t.Start + 1 + } + return t.Start +} + +func (t Token) EndIndex() int { + return t.End +} + +// {id:int range(1,5) else 404} +// /admin/{id:int eq(1) else 402} +// /file/{filepath:tail else 405} +const ( + EOF = iota // 0 + ILLEGAL + + // Identifiers + literals + LBRACE // { + RBRACE // } + // PARAM_IDENTIFIER // id + COLON // : + // let's take them in parser + // PARAM_TYPE // int, string, alphabetic, tail + // PARAM_FUNC // range + LPAREN // ( + RPAREN // ) + // PARAM_FUNC_ARG // 1 + COMMA + IDENT // string or keyword + // Keywords + keywords_start + ELSE // else + keywords_end + INT // 42 + +) + +const eof rune = 0 + +var keywords = map[string]TokenType{ + "else": ELSE, +} + +func LookupIdent(ident string) TokenType { + if tok, ok := keywords[ident]; ok { + return tok + } + return IDENT +} diff --git a/_future/macros_test.go b/_future/macros_test.go index 31c90df9..3a831692 100644 --- a/_future/macros_test.go +++ b/_future/macros_test.go @@ -20,6 +20,33 @@ import ( "gopkg.in/kataras/iris.v6/httptest" ) +// No, better to have simple functions, it will be easier for users to understand +// type ParamEvaluator interface { +// Eval() func(string) bool +// Literal() string +// } + +// type IntParam struct { +// } + +// func (i IntParam) Literal() string { +// return "int" +// } + +// func (i IntParam) Eval() func(string) bool { +// r, err := regexp.Compile("[1-9]+$") +// if err != nil { +// panic(err) +// } +// return r.MatchString +// } + +// func (i IntParam) Eq(eqToNumber int) func(int) bool { +// return func(param int) bool { +// return eqToNumber == param +// } +// } + // a helper to return a macro from a simple regexp // it compiles the regexp and after returns the macro, for obviously performance reasons. func fromRegexp(expr string) _macrofn { diff --git a/_future/path_parser_test.go b/_future/path_parser_test.go index 39c160f7..b66649d1 100644 --- a/_future/path_parser_test.go +++ b/_future/path_parser_test.go @@ -35,9 +35,10 @@ func TestPathParser(t *testing.T) { PathParamTmpl{ SegmentIndex: 1, Param: ParamTmpl{ - Name: "id", - Expression: "int", - Macro: MacroTmpl{Name: "int"}, + Name: "id", + Expression: "int", + FailStatusCode: 404, + Macro: MacroTmpl{Name: "int"}, }, }, },