From d4ac0be3167998df7d002fa5964032ec7f8f61db Mon Sep 17 00:00:00 2001 From: SpookyDervish <78246495+SpookyDervish@users.noreply.github.com> Date: Fri, 26 Dec 2025 12:28:45 +1100 Subject: [PATCH] slowly writing a lexer, literally not functional lmao --- README.md | 1 + fib.vmbl | Bin 0 -> 1536 bytes src/sylt/lexer.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++ src/sylt/lexer.h | 22 ++++++++++++ src/sylt/main.c | 10 ++++++ src/sylt/token.h | 36 ++++++++++++++++++++ sylt | Bin 15776 -> 16744 bytes 7 files changed, 156 insertions(+) create mode 100644 fib.vmbl create mode 100644 src/sylt/lexer.c create mode 100644 src/sylt/lexer.h create mode 100644 src/sylt/main.c create mode 100644 src/sylt/token.h diff --git a/README.md b/README.md index 8a300fc..5965386 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ But uhh that's the name of the VM itself, the name of the programming language I made is Sylt, named after the German island. To compile the VM for Linux: `gcc src/main.c src/vmbl.c src/exception.c src/file_utils.c -o vmbl -O3` +To compile the SYLT compiler for Linux: `gcc src/sylt/main.c src/file_utils.c src/sylt/lexer.c -o sylt -O3` SASM and Sylt are written in Python for now as I'm too mentally challenged to write C code rn. diff --git a/fib.vmbl b/fib.vmbl new file mode 100644 index 0000000000000000000000000000000000000000..a48e1ffa8661b1273d26b724a83e6bca2904b76d GIT binary patch literal 1536 zcmZQ%zz-Oq(l8obH5-NUOy~xp@j0P +#include +#include + +Lexer initLexer(char *source) { + Lexer newLexer = { + .source = source, + .position = -1, + .readPosition = 0, + .lineNumber = 1, + .currentChar = '\0' + }; + return newLexer; +} + +void readChar(Lexer *lexer) { + if (lexer->readPosition >= strlen(lexer->source)) + lexer->currentChar = '\0'; + else + lexer->currentChar = lexer->source[lexer->readPosition]; + + lexer->position = lexer->readPosition; + lexer->readPosition++; +} + +void skipWhitespace(Lexer *lexer) { + switch (lexer->currentChar) + { + case ' ': + case '\t': + case '\n': + case '\r': + if (lexer->currentChar == '\n') + lexer->lineNumber++; + + readChar(lexer); + break; + + default: + break; + } + +} + +Token nextToken(Lexer *lexer) { + //Token tok; + skipWhitespace(lexer); + + switch (lexer->currentChar) + { + case '+': + return NEW_TOKEN(lexer, PLUS, "+"); + break; + case '-': + return NEW_TOKEN(lexer, MINUS, "-"); + break; + case '*': + return NEW_TOKEN(lexer, ASTERISK, "*"); + break; + case '/': + return NEW_TOKEN(lexer, SLASH, "/"); + break; + case '%': + return NEW_TOKEN(lexer, MODULUS, "%"); + break; + case '^': + return NEW_TOKEN(lexer, POW, "^"); + break; + case '(': + return NEW_TOKEN(lexer, LPAREN, "("); + break; + case ')': + return NEW_TOKEN(lexer, RPAREN, ")"); + break; + case '\0': + return NEW_TOKEN(lexer, EOF, ""); + break; + + default: + if (isdigit()) + + break; + } + + //return tok; +} \ No newline at end of file diff --git a/src/sylt/lexer.h b/src/sylt/lexer.h new file mode 100644 index 0000000..e7e986d --- /dev/null +++ b/src/sylt/lexer.h @@ -0,0 +1,22 @@ +#ifndef LEXER_H +#define LEXER_H +#include "token.h" +#include + +typedef struct +{ + int position; + int readPosition; + int lineNumber; + + char currentChar; + char *source; +} Lexer; + +Lexer initLexer(char *source); +void readChar(Lexer *lexer); +void skipWhitespace(Lexer *lexer); +Token nextToken(Lexer *lexer); +char *tokenToCStr(Token token); + +#endif // !LEXER_H diff --git a/src/sylt/main.c b/src/sylt/main.c new file mode 100644 index 0000000..0a76d32 --- /dev/null +++ b/src/sylt/main.c @@ -0,0 +1,10 @@ +#include "lexer.h" +#include +#include "../file_utils.h" + +int main() { + Lexer lexer = initLexer("const x, y = 1, 3\n"); + printf(nextToken(&lexer).literal); + + return 0; +} \ No newline at end of file diff --git a/src/sylt/token.h b/src/sylt/token.h new file mode 100644 index 0000000..85e7cd5 --- /dev/null +++ b/src/sylt/token.h @@ -0,0 +1,36 @@ +#ifndef TOKEN_H +#define TOKEN_H + +typedef enum { + // Special Tokens + EOF, + ILLEGAL, + + // Data Types + INT, + FLOAT, + + // Arithmetic Symbols + PLUS, + MINUS, + ASTERISK, + SLASH, + POW, + MODULUS, + + // Symbols + LPAREN, + RPAREN +} TokenType; + +typedef struct +{ + TokenType type; + char *literal; + int lineNumber; + int position; +} Token; + +#define NEW_TOKEN(lexer, tokType, tokLiteral) (Token){ .type = (tokType), .literal = (tokLiteral), .lineNumber = (lexer->lineNumber), .position = (lexer->position) } + +#endif // !TOKEN_H \ No newline at end of file diff --git a/sylt b/sylt index 81ba3daa1c1691ba8442955a5288812daaf6cb4c..71c2ca543427de9eb8a043f3e65b62e87a0dbd1c 100644 GIT binary patch literal 16744 zcmeHOdu$xXd7tCMVr2S`vL&0A6KSnTE)|IkqEM+~G0guoFTBZ<$ZBs)ePv|^;HTO>?SwvfVgz z;^_MOcILa=+uX%PTc9Xe6Z?Lb%;}M){#l3>Kb2m$*Qwg5j zr7|F$qC?z-|7*n>aRc}|8z941K?Z1yimeQL?L)mQ(>u zMU&JdyL!qh-c2*hq#je27s}J5T)1ePsti-79mVEItF$|^r<9&kad}!%wj-A8#+BW; zvSWIUDul$8<4M(^t3$=pP20!_iK*LeOxd|kDG=i zESPe6e+WB@%YQcUZ9b^Vt8^dEs`xWi;T2nA(czujx5UD&v1lSQ(K^wwvvucoe>&;k zYFB~krQxDJwRiuJfSVj)DvY9LQu6GNWGj`N^s`q_d|VtlxcQ~`f8gpX7yeft-Z z-e7-Mx3Sg#;X-bke|sTc%xx=XxB7PoqknLp5jIoiXf$n^slk2Sv1Gy=3=PLj6h9hI zCRBwCsajT%`gkq+cMbmPaLHUfdOIPKr~Y3rP9SVFUMSrbjow5f;3;L8gr))@}`MhE`118;QTIR}m#UPv!GaBl$>V$Om4 z9C$TbjX*U5)d*B0P>sO<=Lo#zTk~su>Sqo5bp1bW5kh}7XVv8A^r;ICvvyeWJGubp zH}v4YZ*8X_{V|d({xqM@M^4*3Eie`@7kOIfEIwc4X@Rr&)gn&|kHs$*d0KERK3?Q$ zp|SXxB2Np9#Z-}}h0)@0k*9^m;=v+M3yj6CB2Np8#jOQCzu~%Hh=^}(kFeX5-;i+e z5f^{h#ec-b?|1QiF8*E@zthG0UHn}xevOO2wZQ8$dw$}@vhfwPPS5^$VK?ZrbUzj@ z6Ed@BKS|Fk>6(J{DJlH~NuN+sQIMXI(j~0079J^}lM-D`Heuw0*?(L3#&ucl1yEV; z#~_LLeQQsk5;DH%Umg0)j?cqeSf8%j&;ne~_N;Gd&*^aS4yvn9gAMDOq%gA#lBo+F zvkNnDTXg1IYu~T-pZ8~9KCEYds!#pPwF85_?YZ`Eqotoi!TD9StwQ{J(FFKLAo@)G zIwHH4tX1gh|GF7@boKY;=Y4A@sh+cnLjN5S&g^)T$j0k>_L~0O)%*14mTGm+CH>{= z)=D_|nsU&PpO^iJ?P)wr?jf8mJ^Px~s88=X17_j)Z}WLOewXTJzH8GUgZ0;7brCM^k=W=*~Mq5 zF9ow#;eFu%r24LlzDIvW8MyfVyZLgPp{gCzVX7|1}aR?E?wH&2y&aAJ=QcEDc(z-e*!~xAhN%qSOlk& zO~H}TI#fTHeQhC#GW6{8?YaJ4ucUA2_MPw9ARg;(oayyMe2I&=UX1vLpNsf@SW8t_ z^>vg%@%EkH+qhdh7S^8;cgfw8?)&TK(}N}D?p|&U1gDRp>=7<|?l;AxbNWc# zXLQfpy);5FPfiasOxJ#kCSHB!rh7<5j+;Ps`}+QDaD77{JGj0nfKk2jL39MW9|R81 zo(@ptsk}?wjhJ#1FG9$7Kt>;0=>JoF4W7l`V!xN=(ZrAPUcdzZ`sW?>pdSq2Uk1KN z^HVWi(vE&$AkUJ#bUu=D`hkHwPV&3U*wYUNoTG4i1&FAg+VZX@dbKo`5E1>hBL2SJy zujTXf-f|9f5;XTlKHmiV?3?-g80Z}6pMti%mCru~nge|oloGw$rsR3#pzusIdG2Uj z)^Hj!!s#0h9X_#{5~63B{rYM>vfFW;{dqn=N4U4C&)a;z@5W;dlj6SByFRpS{W^ll z{vljC>h65+MYx5F+LQz3L}i`A^%&aJO`z`W%q;gd1k}r=4nTj8>q*q-Rg(94n`UbI zz0Id;177W9UBKHiRo~-nt2wjW+tTCJ0^a5>Z&R1;pc(25xK5$WGi2*+o2oxqcdB-# z#(w8mN3lW7?j#u!wXYdi5SaZo-!7}Gw5+|}woiClP+dJ<0Qijvcl zZA!~kMS0anYhOyMBq{t?X5xtY&Wc|7P|}n=tw$OgO+_2k+;%rkqY`6KRRl~3iV!3UJWxvfouH^41p4*|^|Bn&=Zrkm!O>Jg6 z6+NKn5k7f zXU1l?esb$}f7|wr3jX~b>Gs!&H|#yAQW1a;`<0)0*T=oIMN`%bb~5mva2~e`78ztb7ee;eA>ouEsR4c z#Xb`m^2750<4@W8s|w?q@gE>luKtzU^-s2+m0~k(s9mX?pGD|+#BIXeFWPi!1wUV`fPWJ>)yG}Wmn!K0GjNRybI{j;uc*0Q@OWd< z65-S`WmKmG%P^tdjFOvFyX@xyklkwIY+a-qmHt`PpY3NH5bp-wgnrxHS=6Q6EXpPS zeBS3SB@QS*=hOr~r11MIl>3nK@4jDS%FhvX)4G+Pcm+QvE8t(KfPbX|{#*t8GH{B& zyPiJ+uBo~?$rA8&PC(^v!Tfb|4Su~{^X=_~1DsTGyOV7PMp9P0KZ)<)L&H(S3XKYS zm}+FKXe{j?5%j2Zz`~dQqkXAlybmJ(NHP`{^yxmtFiu+;e5WuXir?28R@}h%`w24* zui>OI8cPm`Vn*0XrqV_zGl2)Y@$r~xnPGohc?~)a5H&)nROq;2CM-N3jif?x(+Fqc z@#8RYa0WE364jVFVWvqx=7cqv zJW6LJsLAT!M2U*~&)4phyA#uslmr;HgFOFiHtTr%p-$!@&6tzUg%EqwHyp(o-j;4yZ>7`WK|0B!oYG zJZ^=CL9LXeW1PbY7&A34{E4Jx`bQHP|9A>7BvRIK2Q-|C;<#2cte`+we=7|$TYW5) z9uxlX@dS#H)JjRopO~q1G?^%249HSuEJOw>Hy*Qu->$G9S^sDfEM?N>i11tJUBYj7 zEPpC#_d&lorbf?L7~YtZKIAZx7Mw>>p?DO|q&e=Wzzh(NV_5PO^53DfZ@^TKwTMj3 zYW>BO_YF#H>vEpx2XWC7o$Wi6J<~Q)awJ~IGeqaYoc8W>hVJDH|NgOkzssK2k4!aI zRMc&M2smEzB)R^)A7aYot8GDHf5C1;$k1Mh?XRfzGd)MaMWQxRmfO$!mMAbXVSC`!1!&`#+|dYB1T8jXQpS1&mbKf8lpm{r>X47%Fyh-~TTFb=qsnj_I7T zW1_@`V!q_EzorzJvVJA|b1r-P5GN_CeNB$?j>P_$ejPGr{dqsc^zV3MuY9l_yL{GV z&+B2P{M>={-SNAq?D_r+O=&7RPnB~dzJCmT7a3Avd){X-4YQ)6?(*e6r)aOQlZs3i z-lP22ppNgB?J-D}{5*xv1-t9ZJkvK|<8#>a^Omtz@>cAxk}L9T|62&C@2Y8|u>az7 z-q%$9Y5Y-QJHC%!21Y7u&-<|>TSy!4QSJZ5e{9e61{gc-MW@njla$M04{XPLBa8{- z@_GH-r0m`LRF*TXP~|K2f{5)m6%d(Wdne_-<5cZb4qH_J{{V!vRpR>a{6o(jxb9B- du^p28pi9W*vMvo%mx?W#?vNU*T?Q^z{0F(WTWkOT delta 2223 zcmZ`)eQZ-z6hH5^kFI5-A6x0VueOuXIl9*lS=~pwy)nej5FHx-FiPrR%*~8VT=`=$ z1{jiT%;~m61V1o@5Tenjutm`rX+kvF693>2BOx&{F(%Wf6g6h@hxOe1?nTzblf3&o z=XcJzALrh8@5+R7_J!H*&ihngO}wS`4bDU5`f0_gM93I@&v%NdLRo|*N173*N&6eMTY5@ zJlXi9&8;YSz~;kYTXD%sry0Ta$bx5VKEKr@c)NO+S6gV>6t&OWf0=jHHlB+DkJt1hqLB(zU~XM2AqK%!0o2&RWe>;-Qb6ws@>P}B;I`#^XGsr9FUqxP7#xu(IyzQ zu4cTcX>BLdk7qOX{P)hl5!S<1ffWA)k8gf7idChRsyB+QrPU2yBH_@M$5Y3Wzq2Eh zn))$QoSIJlMaGJs243l><}2-29gmdy6<8SXy-{?@9$7aZwBD;!TH5O=i7mV;m(M>% z7$IbHPY}LExQlQHK2Up4HITNM7}#OsMczc}Hu^5#%;#NroFdLLjYSAydWOE!#M^ig zFWJkiez|vYuf!3DS{!c-xD#S${i2f`zS|g3R&l8@st#K5c4Lh?30R@)o~+m-n~02Q zctU?z1r2ZLHENDx^#K(+@K(`IEQ+00D)3s&ceswCRoDPW?w%=eVZnb?z_$rr&X!Cr zz0@jD4lcvAZSMJ; zn_jqcPl)I*Il4jK41$L1Jboj{T1oylyk+re>uJ(Kyr`D$k24K?K$qSc2@~%kUanF) zb>cm$6Xb45ew&Cd?5B%(KGNY{(%h;Kwn0S}iM+`zq^1UH{*`g?-Hx z*0J&744!X3h23!jPsR;TdSGZ|2-4~PzD!?wEYtUV2D|T%c+>kw`ib2;HfEA=pl@gd zTU$~%-E!JPDH+h88#WvLnbFKbcdMqVvw#n^>z+YT5Q+2?oM_kGIYCP-#-;YC8`vez zL|_c?;uR5~5;BpeT4IECgM%F|cPz*QK;k6! zv<3X%61jms^R1ybSiZfebcX#ukjHtKSN^H1%p1B%9{wf}a*;1GyIx7MIMo>lT_BBB zOO7D_uPnQdNer}i2Gmu+pF2C$e*hbd4#k2)M!F;=QYtCRgWnii9FzC)6j)Nw(G_>> MZZ^$gGTUnY1(?KDPXGV_