diff --git a/changelogs/unreleased/luajit-grammar-fuzzer.md b/changelogs/unreleased/luajit-grammar-fuzzer.md new file mode 100644 index 0000000000000000000000000000000000000000..59e119c8562ae393e399115cb6f1beeec380d552 --- /dev/null +++ b/changelogs/unreleased/luajit-grammar-fuzzer.md @@ -0,0 +1,3 @@ +## feature/test/fuzz + +* LuaJIT now can be fuzzed using grammar-based fuzzer (gh-4823). diff --git a/test/fuzz/CMakeLists.txt b/test/fuzz/CMakeLists.txt index 94b4987b99fde7bb29333d09ed08874180e94425..a082b735af674c090fb6a35a5a0d8012ed144d2e 100644 --- a/test/fuzz/CMakeLists.txt +++ b/test/fuzz/CMakeLists.txt @@ -93,6 +93,14 @@ create_fuzz_test(PREFIX mp_datetime LIBRARIES core fuzzer_config ) +include(ProtobufMutator) + +# UndefinedBehaviorSanitizer is not supported in LuaJIT. +# See https://github.com/tarantool/tarantool/issues/8473 +if (NOT ENABLE_UB_SANITIZER) + add_subdirectory(luaL_loadbuffer) +endif() + add_custom_target(fuzzers DEPENDS ${FUZZ_TEST_TARGETS} COMMENT "Build fuzzers") diff --git a/test/fuzz/luaL_loadbuffer/CMakeLists.txt b/test/fuzz/luaL_loadbuffer/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..23271186ec4b430fd4a03c6d848c99a253adb8c9 --- /dev/null +++ b/test/fuzz/luaL_loadbuffer/CMakeLists.txt @@ -0,0 +1,32 @@ +add_executable(luaL_loadbuffer_fuzzer + luaL_loadbuffer_fuzzer.cc + serializer.cc) + +add_library(lua_grammar-proto) + +foreach(lib ${LPM_LIBRARIES}) + find_library(${lib} REQUIRED_FILES) +endforeach(lib) + +protobuf_generate(LANGUAGE cpp + TARGET lua_grammar-proto + PROTOS lua_grammar.proto) + +target_link_libraries(lua_grammar-proto + ${PROTOBUF_LIBRARIES}) + +target_include_directories(luaL_loadbuffer_fuzzer PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) + +target_link_libraries(luaL_loadbuffer_fuzzer + PUBLIC + lua_grammar-proto + ${LPM_LIBRARIES} + libluajit_static + fuzzer_config) + +add_dependencies(luaL_loadbuffer_fuzzer + libluajit_static + ${LPM_LIBRARIES} + lua_grammar-proto) + +set(FUZZ_TEST_TARGETS "${FUZZ_TEST_TARGETS};luaL_loadbuffer_fuzzer" PARENT_SCOPE) diff --git a/test/fuzz/luaL_loadbuffer/luaL_loadbuffer_fuzzer.cc b/test/fuzz/luaL_loadbuffer/luaL_loadbuffer_fuzzer.cc new file mode 100644 index 0000000000000000000000000000000000000000..94ebd5bad94970a49fe104ece11b41e34cba6f13 --- /dev/null +++ b/test/fuzz/luaL_loadbuffer/luaL_loadbuffer_fuzzer.cc @@ -0,0 +1,72 @@ +extern "C" +{ +#include <lua.h> +#include <lualib.h> +#include <lauxlib.h> +} + +#include "lua_grammar.pb.h" +#include "serializer.h" + +#include <libprotobuf-mutator/port/protobuf.h> +#include <libprotobuf-mutator/src/libfuzzer/libfuzzer_macro.h> + +/** + * Get an error message from the stack, and report it to std::cerr. + * Remove the message from the stack. + */ +static inline void +report_error(lua_State *L, const std::string &prefix) +{ + const char *verbose = ::getenv("LUA_FUZZER_VERBOSE"); + if (!verbose) + return; + + std::string err_str = lua_tostring(L, 1); + /* Pop error message from stack. */ + lua_pop(L, 1); + std::cerr << prefix << " error: " << err_str << std::endl; +} + +DEFINE_PROTO_FUZZER(const lua_grammar::Block &message) +{ + lua_State *L = luaL_newstate(); + if (!L) + return; + + std::string code = BlockToString(message); + + if (::getenv("LPM_DUMP_NATIVE_INPUT") && code.size() != 0) { + std::cout << "-------------------------" << std::endl; + std::cout << code << std::endl; + } + + luaL_openlibs(L); + + /* + * See https://luajit.org/running.html. + */ + luaL_dostring(L, "jit.opt.start('hotloop=1')"); + luaL_dostring(L, "jit.opt.start('hotexit=1')"); + luaL_dostring(L, "jit.opt.start('recunroll=1')"); + luaL_dostring(L, "jit.opt.start('callunroll=1')"); + + if (luaL_loadbuffer(L, code.c_str(), code.size(), "fuzz") != LUA_OK) { + report_error(L, "luaL_loadbuffer()"); + goto end; + } + + /* + * Using lua_pcall (protected call) to catch errors due to + * wrong semantics of some generated code chunks. + * Mostly, generated code is not semantically correct, so it is + * needed to describe Lua semantics for more interesting + * results and fuzzer tests. + */ + if (lua_pcall(L, 0, 0, 0) != LUA_OK) + report_error(L, "lua_pcall()"); + +end: + lua_settop(L, 0); + lua_close(L); +} diff --git a/test/fuzz/luaL_loadbuffer/lua_grammar.proto b/test/fuzz/luaL_loadbuffer/lua_grammar.proto new file mode 100644 index 0000000000000000000000000000000000000000..6a4fbfad262b4d70376d277d7a73165ac754beb6 --- /dev/null +++ b/test/fuzz/luaL_loadbuffer/lua_grammar.proto @@ -0,0 +1,402 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright 2022, Tarantool AUTHORS, please see AUTHORS file. + */ + +/* + * Grammar is for Lua 5.1. + * Comments around message definitions are in eBNF notation. + * Strings defined using a long format enclosed by long brackets are NIY. + */ +syntax = "proto2"; +package lua_grammar; + +/* block ::= chunk */ +message Block { + required Chunk chunk = 1; +} + +/* + * chunk ::= {stat [`;`]} [laststat [`;`]] + * Semicolon encapsulated in Statement and LastStatement. + */ +message Chunk { + repeated Statement stat = 1; + optional LastStatement laststat = 2; +} + +/* + * stat ::= assignmentlist | + * functioncall | + * doblock | + * whilecycle | + * repeatcycle | + * ifstat | + * forcyclename | + * forcyclelist | + * function | + * localfunc | + * localnames + */ +message Statement { + oneof stat_oneof { + AssignmentList list = 1; + FunctionCall call = 2; + DoBlock block = 3; + WhileCycle whilecycle = 4; + RepeatCycle repeatcycle = 5; + IfStatement ifstat = 6; + ForCycleName forcyclename = 7; + ForCycleList forcyclelist = 8; + Function func = 9; + LocalFunc localfunc = 10; + LocalNames localnames = 11; + } + optional bool semicolon = 12; +} + +/* assignmentlist ::= varlist `=` explist */ +message AssignmentList { + message VariableList { + required Variable var = 1; + repeated Variable vars = 2; + } + + required VariableList varlist = 1; + required ExpressionList explist = 2; +} + +/* functioncall ::= prefixexp args | prefixexp `:` Name args */ +message FunctionCall { + /* args ::= `(` [explist] `)` | tableconstructor | String */ + message Args { + oneof args_oneof { + OptionalExpressionList explist = 1; + TableConstructor tableconstructor = 2; + string str = 3; + } + } + + /* prefixexp args */ + message PrefixArgs { + required PrefixExpression prefixexp = 1; + required Args args = 2; + } + + /* prefixexp `:` Name args */ + message PrefixNamedArgs { + required PrefixExpression prefixexp = 1; + required Name name = 2; + required Args args = 3; + } + + oneof call_oneof { + PrefixArgs prefArgs = 1; + PrefixNamedArgs namedArgs = 2; + } +} + +/* doblock ::= `do` block `end` */ +message DoBlock { + required Block block = 1; +} + +/* whilecycle ::= `while` exp `do` block `end` */ +message WhileCycle { + required Expression condition = 1; + required DoBlock doblock = 2; +} + +/* repeatcycle ::= `repeat` block `until` exp */ +message RepeatCycle { + required Block block = 1; + required Expression condition = 2; +} + +/* + * ifstat ::= `if` exp `then` block {`elseif` exp `then` block} + * [`else` block] `end` + */ +message IfStatement { + message ElseIfBlock { + required Expression condition = 1; + required Block block = 2; + } + + required Expression condition = 1; + required Block first = 2; + repeated ElseIfBlock clauses = 3; + optional Block last = 4; +} + +/* forcyclename ::= `for` Name `=` exp `,` exp [`,` exp] doblock */ +message ForCycleName { + required Name name = 1; + required Expression startexp = 2; + required Expression stopexp = 3; + optional Expression stepexp = 4; + required DoBlock doblock = 5; +} + +/* forcyclelist ::= `for` namelist `in` explist doblock */ +message ForCycleList { + required NameList names = 1; + required ExpressionList expressions = 2; + required DoBlock doblock = 3; +} + +/* function ::= `function` funcname funcbody */ +message Function { + /* funcname ::= Name {`.` Name} [`:` Name] */ + message FuncName { + required Name firstname = 1; + repeated Name names = 2; + optional Name lastname = 3; + } + + required FuncName name = 1; + required FuncBody body = 2; +} + +/* funcbody ::= `(` [parlist] `)` block `end` */ +message FuncBody { + /* namelistwithellipsis ::= namelist [`,` `...`] */ + message NameListWithEllipsis { + required NameList namelist = 1; + optional string ellipsis = 2; + } + + /* parlist ::= namelistwithellipsis | `...` */ + message ParList { + oneof parlist_oneof { + NameListWithEllipsis namelist = 1; + string ellipsis = 2; + } + } + + optional ParList parlist = 1; + required Block block = 2; +} + +/* namelist ::= Name {`,` Name} */ +message NameList { + required Name firstname = 1; + repeated Name names = 2; +} + +/* localfunc ::= `local` `function` Name funcbody */ +message LocalFunc { + required Name name = 1; + required FuncBody funcbody = 2; +} + +/* localnames ::= `local` namelist [`=` explist] */ +message LocalNames { + required NameList namelist = 1; + optional ExpressionList explist = 2; +} + +/* laststat ::= `return` [explist] | `break` */ +message LastStatement { + message ReturnOptionalExpressionList { + optional ExpressionList explist = 1; + } + + oneof last_oneof { + ReturnOptionalExpressionList explist = 1; + uint32 break = 2; + } + optional bool semicolon = 3; +} + +/* explist ::= {exp `,`} exp */ +message ExpressionList { + repeated Expression expressions = 1; + required Expression explast = 2; +} + +message OptionalExpressionList { + optional ExpressionList explist = 1; +} + +/* var ::= Name | prefixexp `[` exp `]` | prefixexp `.` Name */ +message Variable { + /* prefixexp `[` exp `]` */ + message IndexWithExpression { + required PrefixExpression prefixexp = 1; + required Expression exp = 2; + } + + /* prefixexp `.` Name */ + message IndexWithName { + required PrefixExpression prefixexp = 1; + required string Name = 2; + } + + oneof var_oneof { + Name name = 1; + IndexWithExpression indexexpr = 2; + IndexWithName indexname = 3; + } +} + +/* prefixexp ::= var | functioncall | `(` exp `)` */ +message PrefixExpression { + oneof prefix_oneof { + Variable var = 1; + FunctionCall functioncall = 2; + Expression exp = 3; + } +} + +/* + * exp ::= nil | + * false | + * true | + * Number | + * String | + * `...` | + * anonfunction | + * prefixexp | + * tableconstructor | + * exp binop exp | + * unop exp + */ +message Expression { + /* anonfunction ::= `function` funcbod */ + message AnonFunc { + required FuncBody body = 1; + } + + /* exp binop exp */ + message ExpBinaryOpExp { + required Expression leftexp = 1; + required BinaryOperator binop = 2; + required Expression rightexp = 3; + } + + /* unop exp */ + message UnaryOpExp { + required UnaryOperator unop = 1; + required Expression exp = 2; + } + + oneof expr_oneof { + uint32 nil = 1; + uint32 false = 2; + uint32 true = 3; + double number = 4; + string str = 5; + string ellipsis = 6; + AnonFunc function = 7; + PrefixExpression prefixexp = 8; + TableConstructor tableconstructor = 9; + ExpBinaryOpExp binary = 10; + UnaryOpExp unary = 11; + } +} + +/* tableconstructor ::= `{` [fieldlist] `}` */ +message TableConstructor { + optional FieldList fieldlist = 1; +} + +/* fieldlist ::= field {fieldsep field} [fieldsep] */ +message FieldList { + /* fieldsep field */ + message FieldWithFieldSep { + required Field field = 1; + required FieldSep sep = 2; + } + + required Field firstField = 1; + repeated FieldWithFieldSep fields = 2; + optional FieldSep lastSep = 3; +} + +/* field ::= `[` exp `]` `=` exp | Name `=` exp | exp */ +message Field { + /* `[` exp `]` `=` exp */ + message ExpressionAssignment { + required Expression key = 1; + required Expression value = 2; + } + + /* Name `=` exp */ + message NameAssignment { + required Name name = 1; + required Expression value = 2; + } + + oneof field_oneof { + ExpressionAssignment exprassign = 1; + NameAssignment namedassign = 2; + Expression expression = 3; + } +} + +/* fieldsep ::= `,` | `;` */ +message FieldSep { + oneof sep_oneof { + uint32 comma = 1; + uint32 semicolon = 2; + } +} + +/* + * binop ::= `+` | + * `-` | + * `*` | + * `/` | + * `^` | + * `%` | + * `..` | + * `<` | + * `<=` | + * `>` | + * `>=` | + * `==` | + * `~=` | + * `and` | + * `or` + */ +message BinaryOperator { + oneof binary_oneof { + /* Arithmetic operators. */ + uint32 add = 1; + uint32 sub = 2; + uint32 mult = 3; + uint32 div = 4; + uint32 exp = 5; + uint32 mod = 6; + + /* Concatenation. */ + uint32 concat = 7; + + /* Logical operators. */ + uint32 less = 8; + uint32 lessEqual = 9; + uint32 greater = 10; + uint32 greaterEqual = 11; + uint32 equal = 12; + uint32 notEqual = 13; + uint32 and = 14; + uint32 or = 15; + } +} + +/* unop ::= `-` | `not` | `#` */ +message UnaryOperator { + oneof unary_oneof { + uint32 negate = 1; + uint32 not = 2; + uint32 length = 3; + } +} + +/* Name ::= <correct Lua Identifier> */ +message Name { + required string name = 1; + required uint32 num = 2; +} diff --git a/test/fuzz/luaL_loadbuffer/serializer.cc b/test/fuzz/luaL_loadbuffer/serializer.cc new file mode 100644 index 0000000000000000000000000000000000000000..8a54077b9aefb065088c9c3beb836fc6d162229f --- /dev/null +++ b/test/fuzz/luaL_loadbuffer/serializer.cc @@ -0,0 +1,700 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright 2022, Tarantool AUTHORS, please see AUTHORS file. + */ +#include "serializer.h" + +static inline std::string +RemoveLeadingNumbers(const std::string &s) +{ + for (size_t i = 0; i < s.length(); ++i) + if (!std::isdigit(s[i])) + return s.substr(i); + return ""; +} + +static inline std::string +ClearNonIdentifierSymbols(const std::string &s) +{ + std::string cleared; + + if (std::isalpha(s[0]) || s[0] == '_') + cleared += s[0]; + + for (size_t i = 1; i < s.length(); ++i) + if (std::iswalnum(s[i]) || s[i] == '_') + cleared += s[i]; + + return cleared; +} + +static inline std::string +clamp(std::string s, size_t maxSize = kMaxStrLength) +{ + if (s.size() > maxSize) + s.resize(maxSize); + return s; +} + +static inline double +clamp(double number, double upper, double lower) +{ + return number <= lower ? lower : + number >= upper ? upper : number; +} + +static inline std::string +ConvertToStringDefault(const std::string &s) +{ + std::string ident = RemoveLeadingNumbers(s); + ident = clamp(ClearNonIdentifierSymbols(ident)); + if (ident.empty()) + return std::string(kDefaultIdent); + return ident; +} + +PROTO_TOSTRING(Block, block) +{ + return ChunkToString(block.chunk()); +} + +PROTO_TOSTRING(Chunk, chunk) +{ + std::string chunk_str; + for (int i = 0; i < chunk.stat_size(); ++i) + chunk_str += StatementToString(chunk.stat(i)) + "\n"; + + if (chunk.has_laststat()) + chunk_str += LastStatementToString(chunk.laststat()) + "\n"; + + return chunk_str; +} + +/** + * LastStatement and nested types. + */ +PROTO_TOSTRING(LastStatement, laststat) +{ + std::string laststat_str; + using LastStatType = LastStatement::LastOneofCase; + switch (laststat.last_oneof_case()) { + case LastStatType::kExplist: + laststat_str = ReturnOptionalExpressionListToString( + laststat.explist()); + case LastStatType::kBreak: + laststat_str = "break"; + default: + /* Chosen as default in order to decrease number of 'break's. */ + laststat_str = ReturnOptionalExpressionListToString( + laststat.explist()); + } + + if (laststat.has_semicolon()) + laststat_str += "; "; + + return laststat_str; +} + +NESTED_PROTO_TOSTRING(ReturnOptionalExpressionList, explist, LastStatement) +{ + std::string explist_str = "return"; + if (explist.has_explist()) { + explist_str += " " + ExpressionListToString(explist.explist()); + explist_str += " "; + } + return explist_str; +} + +/** + * Statement and statement options. + */ +PROTO_TOSTRING(Statement, stat) +{ + std::string stat_str; + using StatType = Statement::StatOneofCase; + switch (stat.stat_oneof_case()) { + case StatType::kList: + stat_str = AssignmentListToString(stat.list()); + case StatType::kCall: + stat_str = FunctionCallToString(stat.call()); + case StatType::kBlock: + stat_str = DoBlockToString(stat.block()); + /** + * TODO: + * Commented due to possible generation of infinite loops. + * In that case, fuzzer will drop only by timeout. + * Example: 'while true do end'. + */ + /* + * case StatType::kWhilecycle: + * stat_str = WhileCycleToString(stat.whilecycle()); + * case StatType::kRepeatcycle: + * stat_str = RepeatCycleToString(stat.repeatcycle()); + */ + case StatType::kIfstat: + stat_str = IfStatementToString(stat.ifstat()); + case StatType::kForcyclename: + stat_str = ForCycleNameToString(stat.forcyclename()); + case StatType::kForcyclelist: + stat_str = ForCycleListToString(stat.forcyclelist()); + case StatType::kFunc: + stat_str = FunctionToString(stat.func()); + case StatType::kLocalfunc: + stat_str = LocalFuncToString(stat.localfunc()); + case StatType::kLocalnames: + stat_str = LocalNamesToString(stat.localnames()); + default: + /** + * Chosen arbitrarily more for simplicity. + * TODO: Choose "more interesting" defaults. + */ + stat_str = AssignmentListToString(stat.list()); + } + + if (stat.has_semicolon()) + stat_str += "; "; + + return stat_str; +} + +/** + * AssignmentList and nested types. + */ +PROTO_TOSTRING(AssignmentList, assignmentlist) +{ + std::string list_str = VariableListToString(assignmentlist.varlist()); + list_str += " = " + ExpressionListToString(assignmentlist.explist()); + return list_str; +} + +NESTED_PROTO_TOSTRING(VariableList, varlist, AssignmentList) +{ + std::string varlist_str = VariableToString(varlist.var()); + for (int i = 0; i < varlist.vars_size(); ++i) { + varlist_str += ", " + VariableToString(varlist.vars(i)); + varlist_str += " "; + } + return varlist_str; +} + +/** + * FunctionCall and nested types. + */ +PROTO_TOSTRING(FunctionCall, call) +{ + using FuncCallType = FunctionCall::CallOneofCase; + switch (call.call_oneof_case()) { + case FuncCallType::kPrefArgs: + return PrefixArgsToString(call.prefargs()); + case FuncCallType::kNamedArgs: + return PrefixNamedArgsToString(call.namedargs()); + default: + /* Chosen for more variability of generated programs. */ + return PrefixNamedArgsToString(call.namedargs()); + } +} + +NESTED_PROTO_TOSTRING(Args, args, FunctionCall) +{ + using ArgsType = FunctionCall::Args::ArgsOneofCase; + switch (args.args_oneof_case()) { + case ArgsType::kExplist: + return "(" + OptionalExpressionListToString(args.explist()) + + ")"; + case ArgsType::kTableconstructor: + return TableConstructorToString(args.tableconstructor()); + case ArgsType::kStr: + return "'" + ConvertToStringDefault(args.str()) + "'"; + default: + /* For more variability. */ + return TableConstructorToString(args.tableconstructor()); + } +} + +NESTED_PROTO_TOSTRING(PrefixArgs, prefixargs, FunctionCall) +{ + std::string prefixargs_str = PrefixExpressionToString( + prefixargs.prefixexp()); + prefixargs_str += " " + ArgsToString(prefixargs.args()); + return prefixargs_str; +} + +NESTED_PROTO_TOSTRING(PrefixNamedArgs, prefixnamedargs, FunctionCall) +{ + std::string predixnamedargs_str = PrefixExpressionToString( + prefixnamedargs.prefixexp()); + predixnamedargs_str += ":" + NameToString(prefixnamedargs.name()); + predixnamedargs_str += " " + ArgsToString(prefixnamedargs.args()); + return predixnamedargs_str; +} + +/** + * DoBlock clause. + */ +PROTO_TOSTRING(DoBlock, block) +{ + return "do\n" + BlockToString(block.block()) + "end\n"; +} + +/** + * WhileCycle clause. + */ +PROTO_TOSTRING(WhileCycle, whilecycle) +{ + std::string whilecycle_str = "while " + ExpressionToString( + whilecycle.condition()); + whilecycle_str += " " + DoBlockToString(whilecycle.doblock()); + return whilecycle_str; +} + +/** + * RepeatCycle clause. + */ +PROTO_TOSTRING(RepeatCycle, repeatcycle) +{ + std::string repeatcycle_str = "repeat " + BlockToString( + repeatcycle.block()); + repeatcycle_str += "until " + ExpressionToString( + repeatcycle.condition()); + return repeatcycle_str; +} + +/** + * IfStatement and nested types. + */ +PROTO_TOSTRING(IfStatement, statement) +{ + std::string statement_str = "if " + + ExpressionToString(statement.condition()); + statement_str += " then\n\t" + BlockToString(statement.first()); + + for (int i = 0; i < statement.clauses_size(); ++i) + statement_str += ElseIfBlockToString(statement.clauses(i)); + + if (statement.has_last()) + statement_str += "else\n\t" + BlockToString(statement.last()); + + statement_str += "end\n"; + return statement_str; +} + +NESTED_PROTO_TOSTRING(ElseIfBlock, elseifblock, IfStatement) +{ + std::string elseifblock_str = "else if "; + elseifblock_str += ExpressionToString(elseifblock.condition()); + elseifblock_str += " then\n\t"; + elseifblock_str += BlockToString(elseifblock.block()); + return elseifblock_str; +} + +/** + * ForCycleName clause. + * TODO: In 'for i = start, stop, step' construction start, stop, step + * should be numbers. So results of the corresponding expressions + * should be number. + */ +PROTO_TOSTRING(ForCycleName, forcyclename) +{ + std::string forcyclename_str = "for " + NameToString( + forcyclename.name()); + forcyclename_str += " = " + ExpressionToString(forcyclename.startexp()); + forcyclename_str += ", " + ExpressionToString(forcyclename.stopexp()); + + if (forcyclename.has_stepexp()) + forcyclename_str += ", " + ExpressionToString( + forcyclename.stepexp()); + + forcyclename_str += " " + DoBlockToString(forcyclename.doblock()); + return forcyclename_str; +} + +/** + * ForCycleList clause. + */ +PROTO_TOSTRING(ForCycleList, forcyclelist) +{ + std::string forcyclelist_str = "for " + NameListToString( + forcyclelist.names()); + forcyclelist_str += " in " + ExpressionListToString( + forcyclelist.expressions()); + forcyclelist_str += " " + DoBlockToString(forcyclelist.doblock()); + return forcyclelist_str; +} + +/** + * Function and nested types. + */ +PROTO_TOSTRING(Function, func) +{ + std::string func_str = "function " + FuncNameToString(func.name()); + func_str += FuncBodyToString(func.body()); + return func_str; +} + +NESTED_PROTO_TOSTRING(FuncName, funcname, Function) +{ + std::string funcname_str = NameToString(funcname.firstname()); + + for (int i = 0; i < funcname.names_size(); ++i) + funcname_str += "." + NameToString(funcname.names(i)); + + if (funcname.has_lastname()) + funcname_str += ":" + NameToString(funcname.lastname()); + + return funcname_str; +} + +PROTO_TOSTRING(NameList, namelist) +{ + std::string namelist_str = NameToString(namelist.firstname()); + for (int i = 0; i < namelist.names_size(); ++i) + namelist_str += ", " + NameToString(namelist.names(i)); + return namelist_str; +} + +PROTO_TOSTRING(FuncBody, body) +{ + std::string body_str = "( "; + if (body.has_parlist()) + body_str += ParListToString(body.parlist()); + body_str += " )\n\t"; + body_str += BlockToString(body.block()); + body_str += "end\n"; + return body_str; +} + +NESTED_PROTO_TOSTRING(NameListWithEllipsis, namelist, FuncBody) +{ + std::string namelist_str = NameListToString(namelist.namelist()); + if (namelist.has_ellipsis()) + namelist_str += ", ..."; + return namelist_str; +} + +NESTED_PROTO_TOSTRING(ParList, parlist, FuncBody) +{ + using ParListType = FuncBody::ParList::ParlistOneofCase; + switch (parlist.parlist_oneof_case()) { + case ParListType::kNamelist: + return NameListWithEllipsisToString(parlist.namelist()); + case ParListType::kEllipsis: + return "..."; + default: + /* Chosen as default in order to decrease number of ellipses. */ + return NameListWithEllipsisToString(parlist.namelist()); + } +} + +/** + * LocalFunc clause. + */ +PROTO_TOSTRING(LocalFunc, localfunc) +{ + std::string localfunc_str = "local function " + NameToString( + localfunc.name()); + localfunc_str += " " + FuncBodyToString(localfunc.funcbody()); + return localfunc_str; +} + +/** + * LocalNames clause. + */ +PROTO_TOSTRING(LocalNames, localnames) +{ + std::string localnames_str = "local "; + localnames_str += NameListToString(localnames.namelist()); + + if (localnames.has_explist()) + localnames_str += " = " + ExpressionListToString( + localnames.explist()); + return localnames_str; +} + +/** + * Expressions and variables. + */ + +/** + * Expressions clauses. + */ +PROTO_TOSTRING(ExpressionList, explist) +{ + std::string explist_str; + for (int i = 0; i < explist.expressions_size(); ++i) + explist_str += ExpressionToString(explist.expressions(i)) + + ", "; + explist_str += ExpressionToString(explist.explast()) + " "; + return explist_str; +} + +PROTO_TOSTRING(OptionalExpressionList, explist) +{ + if (explist.has_explist()) + return ExpressionListToString(explist.explist()); + return ""; +} + +PROTO_TOSTRING(PrefixExpression, prefixexp) +{ + using PrefExprType = PrefixExpression::PrefixOneofCase; + switch (prefixexp.prefix_oneof_case()) { + case PrefExprType::kVar: + return VariableToString(prefixexp.var()); + case PrefExprType::kFunctioncall: + return FunctionCallToString(prefixexp.functioncall()); + case PrefExprType::kExp: + return "(" + ExpressionToString(prefixexp.exp()) + ")"; + default: + /* + * Can be generated too nested expressions with other options, + * though they can be enabled for more variable fuzzing. + */ + return VariableToString(prefixexp.var()); + } +} + +/** + * Variable and nested types. + */ +PROTO_TOSTRING(Variable, var) +{ + using VarType = Variable::VarOneofCase; + switch (var.var_oneof_case()) { + case VarType::kName: + return NameToString(var.name()); + case VarType::kIndexexpr: + return IndexWithExpressionToString(var.indexexpr()); + case VarType::kIndexname: + return IndexWithNameToString(var.indexname()); + default: + /* + * Can be generated too nested expressions with other options, + * though they can be enabled for more variable fuzzing. + */ + return NameToString(var.name()); + } +} + +NESTED_PROTO_TOSTRING(IndexWithExpression, indexexpr, Variable) +{ + std::string indexexpr_str = PrefixExpressionToString( + indexexpr.prefixexp()); + indexexpr_str += "[" + ExpressionToString(indexexpr.exp()) + "]"; + return indexexpr_str; +} + +NESTED_PROTO_TOSTRING(IndexWithName, indexname, Variable) +{ + std::string indexname_str = PrefixExpressionToString( + indexname.prefixexp()); + indexname_str += "." + ConvertToStringDefault(indexname.name()); + return indexname_str; +} + +/** + * Expression and nested types. + */ +PROTO_TOSTRING(Expression, expr) +{ + using ExprType = Expression::ExprOneofCase; + switch (expr.expr_oneof_case()) { + case ExprType::kNil: + return "nil"; + case ExprType::kFalse: + return "false"; + case ExprType::kTrue: + return "true"; + case ExprType::kNumber: { + /* Clamp number between given boundaries. */ + double number = clamp(expr.number(), kMaxNumber, kMinNumber); + return std::to_string(number); + } + case ExprType::kStr: + return "'" + ConvertToStringDefault(expr.str()) + "'"; + case ExprType::kEllipsis: + return " ... "; + case ExprType::kFunction: + return AnonFuncToString(expr.function()); + case ExprType::kPrefixexp: + return PrefixExpressionToString(expr.prefixexp()); + case ExprType::kTableconstructor: + return TableConstructorToString(expr.tableconstructor()); + case ExprType::kBinary: + return ExpBinaryOpExpToString(expr.binary()); + case ExprType::kUnary: + return UnaryOpExpToString(expr.unary()); + default: + /** + * Arbitrary choice. + * TODO: Choose "more interesting" defaults. + */ + return "'" + ConvertToStringDefault(expr.str()) + "'"; + } +} + +NESTED_PROTO_TOSTRING(AnonFunc, func, Expression) +{ + return "function " + FuncBodyToString(func.body()); +} + +NESTED_PROTO_TOSTRING(ExpBinaryOpExp, binary, Expression) +{ + std::string binary_str = ExpressionToString(binary.leftexp()); + binary_str += " " + BinaryOperatorToString(binary.binop()) + " "; + binary_str += ExpressionToString(binary.rightexp()); + return binary_str; +} + +NESTED_PROTO_TOSTRING(UnaryOpExp, unary, Expression) +{ + std::string unary_str = UnaryOperatorToString(unary.unop()); + unary_str += ExpressionToString(unary.exp()); + return unary_str; +} + +/** + * Tables and fields. + */ +PROTO_TOSTRING(TableConstructor, table) +{ + std::string table_str = "{ "; + if (table.has_fieldlist()) + table_str += FieldListToString(table.fieldlist()); + table_str += " }"; + return table_str; +} + +PROTO_TOSTRING(FieldList, fieldlist) +{ + std::string fieldlist_str = FieldToString(fieldlist.firstfield()); + for (int i = 0; i < fieldlist.fields_size(); ++i) + fieldlist_str += FieldWithFieldSepToString(fieldlist.fields(i)); + if (fieldlist.has_lastsep()) + fieldlist_str += FieldSepToString(fieldlist.lastsep()); + return fieldlist_str; +} + +NESTED_PROTO_TOSTRING(FieldWithFieldSep, field, FieldList) +{ + std::string field_str = FieldSepToString(field.sep()); + field_str += " " + FieldToString(field.field()); + return field_str; +} + +/** + * Field and nested types. + */ +PROTO_TOSTRING(Field, field) +{ + using FieldType = Field::FieldOneofCase; + switch (field.field_oneof_case()) { + case FieldType::kExprassign: + return ExpressionAssignmentToString(field.exprassign()); + case FieldType::kNamedassign: + return NameAssignmentToString(field.namedassign()); + case FieldType::kExpression: + return ExpressionToString(field.expression()); + default: + /* More common case of using fields. */ + return NameAssignmentToString(field.namedassign()); + } +} + +NESTED_PROTO_TOSTRING(ExpressionAssignment, assignment, Field) +{ + std::string assignment_str = "[ " + + ExpressionToString(assignment.key()) + " ]"; + assignment_str += " = " + ExpressionToString(assignment.value()); + return assignment_str; +} + +NESTED_PROTO_TOSTRING(NameAssignment, assignment, Field) +{ + std::string assignment_str = NameToString(assignment.name()); + assignment_str += " = " + ExpressionToString(assignment.value()); + return assignment_str; +} + +PROTO_TOSTRING(FieldSep, sep) +{ + using FieldSepType = FieldSep::SepOneofCase; + switch (sep.sep_oneof_case()) { + case FieldSepType::kComma: + return ","; + case FieldSepType::kSemicolon: + return ";"; + default: + return ","; + } +} + +/** + * Operators. + */ +PROTO_TOSTRING(BinaryOperator, op) +{ + using BinopType = BinaryOperator::BinaryOneofCase; + switch (op.binary_oneof_case()) { + case BinopType::kAdd: + return "+"; + case BinopType::kSub: + return "-"; + case BinopType::kMult: + return "*"; + case BinopType::kDiv: + return "/"; + case BinopType::kExp: + return "^"; + case BinopType::kMod: + return "%"; + + case BinopType::kConcat: + return ".."; + + case BinopType::kLess: + return "<"; + case BinopType::kLessEqual: + return "<="; + case BinopType::kGreater: + return ">"; + case BinopType::kGreaterEqual: + return ">="; + case BinopType::kEqual: + return "=="; + case BinopType::kNotEqual: + return "~="; + case BinopType::kAnd: + return "and"; + case BinopType::kOr: + return "or"; + default: + /* Works in most cases. */ + return "=="; + } +} + +PROTO_TOSTRING(UnaryOperator, op) +{ + using UnaryopType = UnaryOperator::UnaryOneofCase; + switch (op.unary_oneof_case()) { + case UnaryopType::kNegate: + return "-"; + case UnaryopType::kNot: + return "not "; + case UnaryopType::kLength: + return "#"; + default: + /* Works in most cases. */ + return "not "; + } +} + +/** + * Identifier (Name). + */ +PROTO_TOSTRING(Name, name) +{ + std::string ident = ConvertToStringDefault(name.name()); + return ident + std::to_string(name.num() % kMaxIdentifiers); +} diff --git a/test/fuzz/luaL_loadbuffer/serializer.h b/test/fuzz/luaL_loadbuffer/serializer.h new file mode 100644 index 0000000000000000000000000000000000000000..97812c919a71924fab90af755a97884cf7ca91df --- /dev/null +++ b/test/fuzz/luaL_loadbuffer/serializer.h @@ -0,0 +1,123 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright 2022, Tarantool AUTHORS, please see AUTHORS file. + */ +#pragma once + +#include "lua_grammar.pb.h" + +using namespace lua_grammar; + +#define PROTO_TOSTRING(TYPE, VAR_NAME) \ + std::string TYPE##ToString(const TYPE & (VAR_NAME)) + +/* PROTO_TOSTRING version for nested (depth=2) protobuf messages. */ +#define NESTED_PROTO_TOSTRING(TYPE, VAR_NAME, PARENT_MESSAGE) \ + std::string TYPE##ToString \ + (const PARENT_MESSAGE::TYPE & (VAR_NAME)) + +/** + * Fuzzing parameters: + * kMaxNumber - upper bound for all generated numbers. + * kMinNumber - lower bound for all generated numbers. + * kMaxStrLength - upper bound for generating string literals and identifiers. + * kMaxIdentifiers - max number of unique generated identifiers. + * kDefaultIdent - default name for identifier. + * Default values were chosen arbitrary but not too big for better readability + * of generated code samples. + */ +constexpr double kMaxNumber = 1000.0; +constexpr double kMinNumber = -1000.0; +constexpr size_t kMaxStrLength = 20; +constexpr size_t kMaxIdentifiers = 10; +constexpr char kDefaultIdent[] = "Name"; + +PROTO_TOSTRING(Block, block); +PROTO_TOSTRING(Chunk, chunk); + +PROTO_TOSTRING(Statement, stat); + +/** LastStatement and nested types. */ +PROTO_TOSTRING(LastStatement, laststat); +NESTED_PROTO_TOSTRING(ReturnOptionalExpressionList, explist, LastStatement); + +/** + * Statement options. + */ + +/** AssignmentList and nested types. */ +PROTO_TOSTRING(AssignmentList, assignmentlist); +NESTED_PROTO_TOSTRING(VariableList, varlist, AssignmentList); + +/** FunctionCall and nested types. */ +PROTO_TOSTRING(FunctionCall, call); +NESTED_PROTO_TOSTRING(Args, args, FunctionCall); +NESTED_PROTO_TOSTRING(PrefixArgs, prefixargs, FunctionCall); +NESTED_PROTO_TOSTRING(PrefixNamedArgs, prefixnamedargs, FunctionCall); + +/** DoBlock, WhileCycle and RepeatCycle clauses. */ +PROTO_TOSTRING(DoBlock, block); +PROTO_TOSTRING(WhileCycle, whilecycle); +PROTO_TOSTRING(RepeatCycle, repeatcycle); + +/** IfStatement and nested types. */ +PROTO_TOSTRING(IfStatement, statement); +NESTED_PROTO_TOSTRING(ElseIfBlock, elseifblock, IfStatement); + +/** ForCycleName and ForCycleList clauses. */ +PROTO_TOSTRING(ForCycleName, forcyclename); +PROTO_TOSTRING(ForCycleList, forcyclelist); + +/** Function and nested types. */ +PROTO_TOSTRING(Function, func); +NESTED_PROTO_TOSTRING(FuncName, funcname, Function); + +PROTO_TOSTRING(NameList, namelist); +PROTO_TOSTRING(FuncBody, body); +NESTED_PROTO_TOSTRING(NameListWithEllipsis, namelist, FuncBody); +NESTED_PROTO_TOSTRING(ParList, parlist, FuncBody); + +/** LocalFunc and LocalNames clauses. */ +PROTO_TOSTRING(LocalFunc, localfunc); +PROTO_TOSTRING(LocalNames, localnames); + +/** + * Expressions and variables. + */ + +/** Expressions clauses. */ +PROTO_TOSTRING(ExpressionList, explist); +PROTO_TOSTRING(OptionalExpressionList, explist); +PROTO_TOSTRING(PrefixExpression, prefExpr); + +/* Variable and nested types. */ +PROTO_TOSTRING(Variable, var); +NESTED_PROTO_TOSTRING(IndexWithExpression, indexexpr, Variable); +NESTED_PROTO_TOSTRING(IndexWithName, indexname, Variable); + +/** Expression and nested types. */ +PROTO_TOSTRING(Expression, expr); +NESTED_PROTO_TOSTRING(AnonFunc, function, Expression); +NESTED_PROTO_TOSTRING(ExpBinaryOpExp, binary, Expression); +NESTED_PROTO_TOSTRING(UnaryOpExp, unary, Expression); + +/** + * Tables and fields. + */ +PROTO_TOSTRING(TableConstructor, table); +PROTO_TOSTRING(FieldList, fieldlist); +NESTED_PROTO_TOSTRING(FieldWithFieldSep, field, FieldList); + +/** Field and nested types. */ +PROTO_TOSTRING(Field, field); +NESTED_PROTO_TOSTRING(ExpressionAssignment, assignment, Field); +NESTED_PROTO_TOSTRING(NameAssignment, assignment, Field); +PROTO_TOSTRING(FieldSep, sep); + +/** Operators. */ +PROTO_TOSTRING(BinaryOperator, op); +PROTO_TOSTRING(UnaryOperator, op); + +/** Identifier (Name). */ +PROTO_TOSTRING(Name, name);