From b1ff26bcbb14e8788d68276afab33efa3f2c5905 Mon Sep 17 00:00:00 2001 From: SpookyDervish <78246495+SpookyDervish@users.noreply.github.com> Date: Sun, 21 Dec 2025 05:47:15 +1100 Subject: [PATCH] starting work on tokenizer for assembly language --- README.md | 2 +- src/asm/tokenize.c | 56 +++++++++++++++++++++++++++++++++++++++++++++ src/asm/tokenize.h | 33 ++++++++++++++++++++++++++ src/main.c | 21 ++++++++++++----- src/vmbl.c | 21 ++++++++++++++--- vmbl | Bin 0 -> 21720 bytes 6 files changed, 123 insertions(+), 10 deletions(-) create mode 100644 src/asm/tokenize.c create mode 100644 src/asm/tokenize.h create mode 100644 vmbl diff --git a/README.md b/README.md index c19d7c1..9e8570a 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ But uhh that's the name of the VM itself, the name of the programming language I made is Sylt, named after the German island. -To compile for Linux: `gcc src/main.c src/vmbl.c src/exception.c -o VMBL -O3` +To compile for Linux: `gcc src/main.c src/vmbl.c src/exception.c src/asm/tokenize.c -o VMBL -O3` ## Syntax ### Example "Hello, World!" Program diff --git a/src/asm/tokenize.c b/src/asm/tokenize.c new file mode 100644 index 0000000..b27750a --- /dev/null +++ b/src/asm/tokenize.c @@ -0,0 +1,56 @@ +#include "tokenize.h" +#include +#include + + +char *parseName(Tokenizer *tokenizer) { + char *ptr = tokenizer->source; + + int i = 0; + while (isName(*ptr++)) + { + i++; + } + + printf("%d\n", i); + return "hi"; + +} + +bool isNumber(char character) { + return isalnum(character) || character == '.'; +} + +bool isName(char character) { + return isalnum(character) || character == '_'; +} + +Token getCurrentToken(Tokenizer *tokenizer) { + Token token = (Token){}; + + switch (*tokenizer->source) + { + default: + if (isName(*tokenizer->source)) { + char *tokenValue = parseName(tokenizer); + } + + break; + } +} + +char* tokenTypeAsCStr(TokenType type) { + switch (type) + { + case TOKEN_INSTRUCTION: + return "INSTRUCTION"; + break; + + case TOKEN_INT_LITERAL: + return "INT_LITERAL"; + break; + + default: + break; + } +} \ No newline at end of file diff --git a/src/asm/tokenize.h b/src/asm/tokenize.h new file mode 100644 index 0000000..b2e17ce --- /dev/null +++ b/src/asm/tokenize.h @@ -0,0 +1,33 @@ +#ifndef TOKENIZE_H +#define TOKENIZE_H + +#include + +typedef enum +{ + TOKEN_INSTRUCTION, + TOKEN_INT_LITERAL, +} TokenType; + +typedef struct +{ + TokenType type; + char* value; +} Token; + +typedef struct +{ + char *source; + unsigned int pos; + unsigned int line; + unsigned int column; +} Tokenizer; + +bool isNumber(char character); +bool isName(char character); +Token getCurrentToken(Tokenizer *tokenizer); +char* tokenTypeAsCStr(TokenType type); + +char *parseName(Tokenizer *tokenizer); + +#endif // !TOKENIZE_H diff --git a/src/main.c b/src/main.c index babaa1f..ab015f4 100644 --- a/src/main.c +++ b/src/main.c @@ -1,19 +1,28 @@ #include "vmbl.h" +#include "asm/tokenize.h" +#include #define ARRAY_SIZE(array) (sizeof(array) / sizeof(array[0])); VMBL_Instruction program[] = { - MAKE_INST_PUSH(123), - MAKE_INST_PUSH(0), + MAKE_INST_PUSH(124), + MAKE_INST_PUSH(2), MAKE_INST_DIV, - + MAKE_INST_HALT }; int main() { - VMBL_State vmblState = {}; + //VMBL_State vmblState = {}; - VMBL_LoadExecutable(&vmblState, program, sizeof(program)); - VMBL_StartVM(&vmblState); + //VMBL_LoadExecutable(&vmblState, program, sizeof(program)); + //VMBL_StartVM(&vmblState); + + Tokenizer tokenizer = { + "push 1224\npush 2\ndiv\nhalt" + }; + Token token = getCurrentToken(&tokenizer); + + printf("%s\n", tokenTypeAsCStr(token.type)); return 0; } \ No newline at end of file diff --git a/src/vmbl.c b/src/vmbl.c index d6753a5..529b319 100644 --- a/src/vmbl.c +++ b/src/vmbl.c @@ -11,7 +11,6 @@ } \ (vmblState)->stack[(vmblState)->stackSize - 2] = (vmblState)->stack[(vmblState)->stackSize - 2] operation (vmblState)->stack[(vmblState)->stackSize - 1]; \ (vmblState)->stackSize--; \ - (vmblState)->ip++; \ } while (0); @@ -37,7 +36,7 @@ VMBL_Exception VBML_ExecuteInstruction(VMBL_State *vmblState, VMBL_Instruction i MATH_OP(vmblState, *); break; case INSTRUCTION_DIV: - if (instruction.opperands[0] == 0) { + if (vmblState->stack[vmblState->stackSize-1] == 0) { return (VMBL_Exception){ EXCEPTION_DIVIDE_BY_ZERO }; } @@ -152,7 +151,7 @@ void VMBL_LoadExecutable(VMBL_State *vmblState, VMBL_Instruction *program, size_ exit(1); } - memcpy(&vmblState->program, program, sizeof(program) * programSize); + memcpy(&vmblState->program, program, programSize); vmblState->programSize = programSize; } @@ -192,6 +191,7 @@ void VMBL_SaveExecutable(const char* filePath, VMBL_Instruction *program, size_t } char *instructionTypeToCStr(InstructionType type) { + printf("%x\n", type); switch (type) { @@ -227,6 +227,21 @@ char *instructionTypeToCStr(InstructionType type) { case INSTRUCTION_EQUAL: return "EQ "; break; + case INSTRUCTION_NOT_EQUAL: + return "NEQ "; + break; + case INSTRUCTION_GREATER_THAN: + return "GT "; + break; + case INSTRUCTION_GREATER_THAN_EQUAL: + return "GTE "; + break; + case INSTRUCTION_LESS_THAN: + return "LT "; + break; + case INSTRUCTION_LESS_THAN_EQUAL: + return "LTE "; + break; case INSTRUCTION_JUMP_CONDITIONAL: return "JC "; diff --git a/vmbl b/vmbl new file mode 100644 index 0000000000000000000000000000000000000000..b4a7aff5174a7e61dd2a6baa7d58421ab7831548 GIT binary patch literal 21720 zcmeHPdw5jUwcnEm!~io zPG=Xe3BdC>P1KhQfLf6b<&1;{lAa4la>Y~_1%5`5nS#nff+RO)DwH(}773i>@~J3s zC){$j;8Rf5XWE-9>X`!uCJMTOb~*BmzD(5X(tirBmHlp)P|1L;eQ<5NNDUiO8KQ1hY%U+8GPq$AW7@42LB@uEu>EhvqKOXqPD$Sw_; zVroV0Y6dj}L{r0=(<^YLkLsq%dg8zOviq(sE4uyVzS@?DHdh$`Jn14mMLc9T$q)}E z(kD&iIF(5c@o;~*HW59CXw#6N2|nGq6tPHeeoFd?~Osfat!+P zG5S?G27eFeGmxF>DL+cyMm2P z%^gA5wZ1dlB@H$N>!^%yV-99pF8|BppDxV$T+BW}qmqkFX3e8EC25ofvI+U`}VPGZ=+35CpG;E{OTA-4OQ(8x9 z{Zi>V$xqv>T2ra6-A1SNQl)Mio$vFqqQ^$(>!qak+URl%=QTIk=*o9gvBO4Jj}}D# zj*UKv2r~8C=#v!@aF>lvYl=#{ZFILH0`9TVJvRD)jjm#is-LjY&$RI$w9#cK*lNd% z;~p6Iz_U12T!Jjm9}Ir#KU zFK5KxPBrPnmSwf=;5a=nq@T^=^q`P_EQ`|yp1wbe)5e{?JB!nXoxUxL(?*^CW)`On zIvvU4v@xffvp79~q*rBe+Mv@FS)4ZJ^t>!i8*+L^7N?CkeO4By4LF^X!L6B3U(Q&Y zXS$E^{#!FY=fE#;;Gc5fZU;WWfuF)_hh6_M2mX!&f5U2d`FupunZA+&-OMXVb>QK}{B`Pg06O4Zr=YwK+^zb>LN-1Tf{Djx?N8l@Iv#Ij zZ~M(>zO0*v^~7sK^^H|!17%N=MKHmdmJ8EAY%BFl-;9Q#G`Z|yP|)uE1eOfPrXked zSc)=2{S0f+GrgDi_DcwUlk+e9CV|xpHzffHSOJvLeITXs8;)Q>)~tW+y(J&->We6DLDn`xiq(;K)m zdm+g5zA48c^azFQUh+Q-Z2Jttb3VKQx%#!b{=(lF3~=9AQj{2;4gw~1-QTW{%!rTfs&TM3U%)Xj&B`pF#S7rl?y@M9J|al+-<`U-lZC-a`s z`?`nqCEY{io}CBu#9OX%&x6NwvwKMDI`VTq@02`u!KM=ymK4vnJGqJqmu?C(%%#s9 z*)GOI@e6NIdtucc+Td-vGHs+A;qVc1!o;Ock=a?13;7tq1I~U;94R!Udwz-zW;>LA z6QhB~WPJAB$KbI&wB+f@rSAp$Rt(o{8Src#06}`|6s*6G>ibp<>4|q-`gi>wK_d{8 z3#Y%$W1$x;BkfsoB6i~*n8S48u?<~n_53w zu{ktEa%j@$XWKonskk4poG#=f#0jnW`$c?t8&@~zefO4V8e;K0EXIh-l1mZKd~lM} zzW-^E%zb48sjiPL>k(S*5%xL6!HrKYyA>QMFDmsdZ^xFFy%Fr!kOfF%8B`?L_#e~uW$xBMlW6ZXyZ7x~dPquEOSl7*u^<_LRZ+Qr2 zXV#N?7p77UtbLSs*Zh`XeoWstn6K|UoDa_w7irvN*YACO_ZOfX=)1d^gGb0`KJ)!l z-U-X{CQf-g+s?vn=`)A?Exs)*;7VOiOISIpF^|`ngSwd(tt6-Yyci}W7v2U?HsCjp zrXD+PS%JPj28I45Y0u_)(Bt29T+F0}K;T=?O&thCf6|K~PH@ho*V1RFu#oRxY34nZ z{w1TH`54-x#?U|+8bIS%f38J|w~m2a%^UkZsaTm@@T%TdJFIVc(zE$%)M(-)0{Y*v zkm!BYL;l231hoFqaowyQ3Yc8v$kmzofp$oHwqI|b!~69J0NQzbm^#IUgxYJMc4ac} zRj#(Lo@QS?qPkwG?)`ajKlNeDT?kOUFTn>x4BO51h+RF|QBw4X1|Hq~gKoZ?x*jb& z3ac?D_fU6IU&7M|0=VUi*#JKLDPpGp{t38M0DFnU1xYN~2f>l*MP&_~y!j~@$%A+w z$YR|=9KQ@5_BeJyUdHhn>gJLo@fXr>Ac(|xJN!Z|U^wmufE~a4$l(_{-?jN=Sf8D7 zNjhbl@2B~@OXyuG^nT5$_bBWcq1Tnwy90Vpi%YN2cVZg1^AfI41f`0_A3>!p1Xr4K zo=R776A^cavwI-wRJ-uULf^~BGX3`Mr6uUJ@%$b*eC9`~Zz0ezjCU@~4&!;yA%^h{ zFsWhuDsjkRJOdo5hf!Ixa{xQ9dkI81V#_c_6=#h&?U3XRqedh{&HPEM< z_Z0QRgiR3Iaa#Lx1piw&l%oEF|3HGKDE4Tvyk?#RJzHLeGxSZ5>!_x6+5S07qdRdUSAEUXlCGS?<^#oQ?wg2-j;^EM< zd5}RxBBj>C^Z<4fFW+U-|H?hi%*ucZ6}`pw5vr6|^zZv<#FJnY}}8XK}K|Gszf^~B&{ zVD@2s$@6&KP|p(EONQwL7sU9HnHGxr?8nhm{2IUc8ejR}--`~hfca+X-`<0sO{aKg zSNfB`IQ9!~lbT=O^{dm4EA+zV&(zQU#N;sFccu$NxFQx8$2<}3VJ zY7)v0eGTTv*te1kr_j6I??r_9z?S$Jk1vyG4kxEArSNLW`zPIXkmjE4eJ%eaT(d6o z-jK{c=D?3Q@YfyqJ_r6E4t%!*-{!#U9k|zlU+BOmJMc*k{1}cC?EP8psDH~|PveoW zCT#cye6$u}YFhBpBf?8G|M4O_AM$X|W{d>6GJmQApX0z6Iq)wz@Gm&<_XvmHD+rQ3 zczBc~=$0&(O_-WzQ}(;{)fkn5*;09*xnV!(AQudX2e zZtrm;_LgtPsBbJ{10{XHPhRePruXLrF-$8e<0^u~`S*${~_X~9L9Pn{1$?=bI4~%Lu zb9-iXtOdQJwU7lGt2BIQ8e2i76;!INx)Pr=dkWd|<;zisY9!G!X9Or@D=Wd}_4zdB zTV1cQhSe1st63e;*vcB9KK~kx>E1vitH&!Gi!`$}H5GwNwQ78KHQ1`f!Ww-~Rciyr zHZ*7LHEQc>t88dPqqp)3qi#*rs_H=9Ri{;~uJxT(bn^spn`)T(ty!s{bVomyKY+^OMPwBg7`tvT4z6pseAP)rL&X&|%>;Ye##3w3Ex zeDijWwxJz5v{-vk>ul=bG|t#s%3I0|uAzElv&-;SR#r7M5M$3_%i4uJh+OolWz{41 zBM%`Lzh+q%fzFU;p}d&NDa+dZre&=~E`P(a4!v$!?UdhwJn~=~hr-Cc$VJEuc?dG~ z$lWM+gXaUl{m4<|`JjJ*ya#XFhu+4CA@VSC0q|nvCy-|$FGkjp*CMwg_aNVd{2k<7 z$WI{eK|X|>LN35bKr!+#%C7TrFWZB@;y)E^^2?TWl;9sMYpRQ}&j8K=ocxMq-HYSF z8GyrpO96eL-wF6U__q&QRx9v(0eb+C0`3G1z)!mf2OI?4e#EjGA^#h|UcjSAE$cU= z4}F@7Q`s9~&vwAcu5L zJ2++Mzoi*~{huQS-$b0wF;;@S0J-=j_#S-BT~zHZUg?>%p`e#7KX=LJ=awKz5!v7E1 z0P-Bra2k#?T6dAxU9i>-X5xPdxgK?2hJPrL-PGotf3&Rc5~12%v^~e`F5a5!b!&;d zoZdorF?3Y83#ul8EQ*YEgLM}AnuqxO7X7ywa<}AExQlPjt#E6}ybAZM?fG8!+^rM5 z?!}3T`s7J_26C>PGSTPm-ItS_%rA5=My=O93*3;Z;4M^7%Dvq+YK=}krmw-!*A?a> zhA%^zRk@3{lC6o{6>hCX7>dr4dx-Z5@cO_zjmCuYR!*7VyE!+R*SI}DF(K!bLJTaJ z$I%^J2T(`-7j@Tig;ZC9x+g}dqq$Iyx|dKFl5%p4d~Pjgt-BbFV2mgh z>332kn5*Xy-*_7Lz_3OAHEtfB1ti(*9kH`6HscG9i`_$O6s>%YG3@0G*Hn8WI@?|NQkjBB&7YB625xM zlj6fyFL?jZ94FyR4IHnR`mAg{m&4;Qub(07@qILotA0rJ|8GG3KHI5qEgIS?EUuN!FP&dH_mV|%j?Vk~yalCm7tEIAu`~!eg=rpp&vJ56 z+__Lg?^u*H{>d0ki3|E{!0{G_}%cXdO?xXnmx(spJHm= zQT#(Fkbbr9Df&syKP~gSR7F1z3PhC=8yYN<^K4#7#bkru#e~opR(0u5Rdo;7Q^atb#gkfp_i?)7 z%ED*IpuZ;d?6^?y?vNrMfnJ1m)$eMRrc-0+ITLoC1&QlqVDk592%SgtoHLo)pOl^p zIlscPaOoKII;o#&f|tL8qS&J7a^q0%^#3U7gK`7l@4w)`19a+_)1N<({5xg{{j&sM zKjCzV*Z_TDL$l$k_%!He;#=Czb@0V8^t?^{6cKZU2DQ$eg$J}Vkk`+i~!J>MRKzDw#Ekowg){&Wm}Is+SRK0XIJ zjia-_uZ+Qe9CS^XKH~Fi3@D8klO5vcMBvasmnM$jzl`{E@RSD;d-8HUc8Vxp4f?3} zG&HnCVr8XaTu{)|95P}}>)8#R%^jsJ{H)vX#XGx8yK%`v>0Chw;5@jhC)g5?(X|GD zrm7le#nm_gE^P^SvX5sk(1y}lNfy+FZ;bsD(2p(_}Lb**7z zeMh*tsl#ZEg(Fd;Dc-|c!kyh6!C0^r){V+R7eR!Krbwh|qY>4!HU3JFafOOe zrOR5~w~84n0(BMMfKgXn-B8tNG{T}Z+2(HKF=bi^Aakxpn85A?YLz5! zh5r&-uQ+>qQ?#9xwr=b~69Qsx@R}Qfktn9j2*f~LB-qhJ09RGE1UBW1{5Mxru`6+(fnS?Ef87zCtQg=P(MYa~qN;pE>3K1vK)B(w}** zk`l$YlknuLm9sZh9ryB+d#rJRC8N>D;2 zr{Kd5c}+fSDtMLDr_?Apg+J<$SMN~@s`Ed^@ATiVq`Vq`^`4<1{cqTw)c7gdZ&4us zD|z*vtKcsbqlC`(Uv$VHlm{9L-uM^Ve;xApqC201)Py?MRQK>X?Nzvf??J|6lUL^t z^~*?GHeLmm#g+UgsG!(YW~%ppb+4kjKau7iB_*fE@gC5~e@b4Rd+N)H8$Zn!AoHo@ z6`TNJyL{%owOokwXCy^IbKsL9OuAJ2>V2laTres-m6)aq5}qmBpX;D2c{;DMC$g22 z9je$7HRAJNxqt(5VW9Ibyp?DCr`F#yKvT903GumOg@7H7fK%-%UYcH`$)CJJaGdK9 IaG>me0H@*eumAu6 literal 0 HcmV?d00001