| 1 | = UTT Based on Examples = |
| 2 | == Author: Mateusz Boryga == |
| 3 | |
| 4 | |
| 5 | |
| 6 | == tok == |
| 7 | |
| 8 | === tok.1. === |
| 9 | Description: |
| 10 | {{{ |
| 11 | Change raw text into list of tokens. |
| 12 | }}} |
| 13 | Command: |
| 14 | {{{ |
| 15 | echo 'W moim domu jest zawsze głośno. Stoi przy ruchliwej ulicy.' | tok |
| 16 | }}} |
| 17 | Output: |
| 18 | {{{ |
| 19 | 0000 01 W W |
| 20 | 0001 01 S _ |
| 21 | 0002 04 W moim |
| 22 | 0006 01 S _ |
| 23 | 0007 04 W domu |
| 24 | 0011 01 S _ |
| 25 | 0012 04 W jest |
| 26 | 0016 01 S _ |
| 27 | 0017 06 W zawsze |
| 28 | 0023 01 S _ |
| 29 | 0024 06 W głośno |
| 30 | 0030 01 P . |
| 31 | 0031 01 S _ |
| 32 | 0032 04 W Stoi |
| 33 | 0036 01 S _ |
| 34 | 0037 04 W przy |
| 35 | 0041 01 S _ |
| 36 | 0042 09 W ruchliwej |
| 37 | 0051 01 S _ |
| 38 | 0052 05 W ulicy |
| 39 | 0057 01 P . |
| 40 | 0058 01 S \n |
| 41 | }}} |
| 42 | |
| 43 | == lem == |
| 44 | |
| 45 | === lem.1. === |
| 46 | Description: |
| 47 | {{{ |
| 48 | Perform morphological analysis. |
| 49 | }}} |
| 50 | Command: |
| 51 | {{{ |
| 52 | echo 'Ulicą jedzie szybki i zielony samochód.' | tok | lem |
| 53 | }}} |
| 54 | Output: |
| 55 | {{{ |
| 56 | 0000 05 W Ulicą lem:ulica,N/CiGfNs |
| 57 | 0005 01 S _ |
| 58 | 0006 06 W jedzie lem:jechać,V/AiMdNsP3R-TfrVp |
| 59 | 0012 01 S _ |
| 60 | 0013 06 W szybki lem:szybki,ADJ/CaDpGiNs |
| 61 | 0013 06 W szybki lem:szybki,ADJ/CnvDpGaipNs |
| 62 | 0013 06 W szybki lem:szybka,N/CaGfNp |
| 63 | 0013 06 W szybki lem:szybka,N/CgGfNs |
| 64 | 0013 06 W szybki lem:szybka,N/CnGfNp |
| 65 | 0013 06 W szybki lem:szybka,N/CvGfNp |
| 66 | 0019 01 S _ |
| 67 | 0020 01 W i lem:i,CONJ |
| 68 | 0020 01 W i lem:i,EXCL |
| 69 | 0021 01 S _ |
| 70 | 0022 07 W zielony lem:zielony,ADJ/CaDpGiNs |
| 71 | 0022 07 W zielony lem:zielony,ADJ/CnvDpGaipNs |
| 72 | 0029 01 S _ |
| 73 | 0030 08 W samochód lem:samochód,N/CaGiNs |
| 74 | 0030 08 W samochód lem:samochód,N/CnGiNs |
| 75 | 0038 01 P . |
| 76 | 0039 01 S \n |
| 77 | }}} |
| 78 | |
| 79 | === lem.2. === |
| 80 | Description: |
| 81 | {{{ |
| 82 | Perform morphological analysis. Print ambiguous annotation in one output line by generating multiple annotation fields. |
| 83 | }}} |
| 84 | Command: |
| 85 | {{{ |
| 86 | echo 'Ulicą jedzie szybki i zielony samochód.' | tok | lem --one-line |
| 87 | }}} |
| 88 | Output: |
| 89 | {{{ |
| 90 | 0000 05 W Ulicą lem:ulica,N/CiGfNs |
| 91 | 0005 01 S _ |
| 92 | 0006 06 W jedzie lem:jechać,V/AiMdNsP3R-TfrVp |
| 93 | 0012 01 S _ |
| 94 | 0013 06 W szybki lem:szybki,ADJ/CaDpGiNs lem:szybki,ADJ/CnvDpGaipNs lem:szybka,N/CaGfNp lem:szybka,N/CgGfNs lem:szybka,N/CnGfNp lem:szybka,N/CvGfNp |
| 95 | 0019 01 S _ |
| 96 | 0020 01 W i lem:i,CONJ lem:i,EXCL |
| 97 | 0021 01 S _ |
| 98 | 0022 07 W zielony lem:zielony,ADJ/CaDpGiNs lem:zielony,ADJ/CnvDpGaipNs |
| 99 | 0029 01 S _ |
| 100 | 0030 08 W samochód lem:samochód,N/CaGiNs lem:samochód,N/CnGiNs |
| 101 | 0038 01 P . |
| 102 | 0039 01 S \n |
| 103 | }}} |
| 104 | |
| 105 | === lem.3. === |
| 106 | Description: |
| 107 | {{{ |
| 108 | Perform morphological analysis. Print ambiguous annotation in one annotation field. |
| 109 | }}} |
| 110 | Command: |
| 111 | {{{ |
| 112 | echo 'Ulicą jedzie szybki i zielony samochód.' | tok | lem -1 |
| 113 | }}} |
| 114 | Output: |
| 115 | {{{ |
| 116 | 0000 05 W Ulicą lem:ulica,N/CiGfNs |
| 117 | 0005 01 S _ |
| 118 | 0006 06 W jedzie lem:jechać,V/AiMdNsP3R-TfrVp |
| 119 | 0012 01 S _ |
| 120 | 0013 06 W szybki lem:szybki,ADJ/CaDpGiNs,ADJ/CnvDpGaipNs;szybka,N/CaGfNp,N/CgGfNs,N/CnGfNp,N/CvGfNp |
| 121 | 0019 01 S _ |
| 122 | 0020 01 W i lem:i,CONJ,EXCL |
| 123 | 0021 01 S _ |
| 124 | 0022 07 W zielony lem:zielony,ADJ/CaDpGiNs,ADJ/CnvDpGaipNs |
| 125 | 0029 01 S _ |
| 126 | 0030 08 W samochód lem:samochód,N/CaGiNs,N/CnGiNs |
| 127 | 0038 01 P . |
| 128 | 0039 01 S \n |
| 129 | }}} |
| 130 | |
| 131 | |
| 132 | == gue == |
| 133 | |
| 134 | === gue.1. === |
| 135 | Description: |
| 136 | {{{ |
| 137 | Guess morphological descriptions. |
| 138 | }}} |
| 139 | Command: |
| 140 | {{{ |
| 141 | echo 'smerfny' | tok | gue |
| 142 | }}} |
| 143 | Output: |
| 144 | {{{ |
| 145 | 0000 07 W smerfny gue:smerfny,ADJ/CaDpGiNs |
| 146 | 0000 07 W smerfny gue:smerfny,ADJ/CnvDpGaipNs |
| 147 | 0007 01 S \n |
| 148 | }}} |
| 149 | |
| 150 | === gue.2. === |
| 151 | Description: |
| 152 | {{{ |
| 153 | Guess morphological descriptions. Print ambiguous annotation in one output line by generating multiple annotation fields. |
| 154 | }}} |
| 155 | Command: |
| 156 | {{{ |
| 157 | echo 'smerfny' | tok | gue --one-line |
| 158 | }}} |
| 159 | Output: |
| 160 | {{{ |
| 161 | 0000 07 W smerfny gue:smerfny,ADJ/CaDpGiNs gue:smerfny,ADJ/CnvDpGaipNs |
| 162 | 0007 01 S \n |
| 163 | }}} |
| 164 | |
| 165 | === gue.3. === |
| 166 | Description: |
| 167 | {{{ |
| 168 | Guess morphological descriptions. Print ambiguous annotation in one annotation field. |
| 169 | }}} |
| 170 | Command: |
| 171 | {{{ |
| 172 | echo 'smerfny' | tok | gue -1 |
| 173 | }}} |
| 174 | Output: |
| 175 | {{{ |
| 176 | 0000 07 W smerfny gue:smerfny,ADJ/CaDpGiNs,ADJ/CnvDpGaipNs |
| 177 | 0007 01 S \n |
| 178 | }}} |
| 179 | |
| 180 | |
| 181 | == cor == |
| 182 | |
| 183 | === cor.1. === |
| 184 | Description: |
| 185 | {{{ |
| 186 | Correct spelling. |
| 187 | }}} |
| 188 | Command: |
| 189 | {{{ |
| 190 | echo 'kresło' | tok | cor |
| 191 | }}} |
| 192 | Output: |
| 193 | {{{ |
| 194 | 0000 06 W kresło cor:kresko |
| 195 | 0000 06 W kresło cor:kreso |
| 196 | 0000 06 W kresło cor:krzesło |
| 197 | 0006 01 S \n |
| 198 | }}} |
| 199 | |
| 200 | === cor.2. === |
| 201 | Description: |
| 202 | {{{ |
| 203 | Correct spelling. Print ambiguous annotation in one output line by generating multiple annotation fields. |
| 204 | }}} |
| 205 | Command: |
| 206 | {{{ |
| 207 | echo 'kresło' | tok | cor --one-line |
| 208 | }}} |
| 209 | Output: |
| 210 | {{{ |
| 211 | 0000 06 W kresło cor:kresko cor:kreso cor:krzesło |
| 212 | 0006 01 S \n |
| 213 | }}} |
| 214 | |
| 215 | === cor.3. === |
| 216 | Description: |
| 217 | {{{ |
| 218 | Correct spelling. Print ambiguous annotation in one annotation field. |
| 219 | }}} |
| 220 | Command: |
| 221 | {{{ |
| 222 | echo 'kresło' | tok | cor -1 |
| 223 | }}} |
| 224 | Output: |
| 225 | {{{ |
| 226 | 0000 06 W kresło cor:kresko;kreso;krzesło |
| 227 | 0006 01 S \n |
| 228 | }}} |
| 229 | |
| 230 | === cor.4. === |
| 231 | Description: |
| 232 | {{{ |
| 233 | Correct spelling. Change maximum edit distance into 2. |
| 234 | }}} |
| 235 | Command: |
| 236 | {{{ |
| 237 | echo 'kzzeslo' | tok | cor -n 2 |
| 238 | }}} |
| 239 | Output: |
| 240 | {{{ |
| 241 | 0000 07 W kzzeslo cor:krzesko |
| 242 | 0000 07 W kzzeslo cor:krzesło |
| 243 | 0007 01 S \n |
| 244 | }}} |
| 245 | |
| 246 | |
| 247 | == kor == |
| 248 | |
| 249 | === kor.1. === |
| 250 | Description: |
| 251 | {{{ |
| 252 | Correct spelling. |
| 253 | }}} |
| 254 | Command: |
| 255 | {{{ |
| 256 | echo 'kresło' | tok | kor |
| 257 | }}} |
| 258 | Output: |
| 259 | {{{ |
| 260 | 0000 06 W kresło kor:krzesło |
| 261 | 0000 06 W kresło kor:kreso |
| 262 | 0000 06 W kresło kor:kresko |
| 263 | 0006 01 S \n |
| 264 | }}} |
| 265 | |
| 266 | === kor.2. === |
| 267 | Description: |
| 268 | {{{ |
| 269 | Correct spelling. Print ambiguous annotation in one output line by generating multiple annotation fields. |
| 270 | }}} |
| 271 | Command: |
| 272 | {{{ |
| 273 | echo 'kresło' | tok | kor --one-line |
| 274 | }}} |
| 275 | Output: |
| 276 | {{{ |
| 277 | 0000 06 W kresło kor:krzesło kor:kreso kor:kresko |
| 278 | 0006 01 S \n |
| 279 | }}} |
| 280 | |
| 281 | === kor.3. === |
| 282 | Description: |
| 283 | {{{ |
| 284 | Correct spelling. Print ambiguous annotation in one annotation field. |
| 285 | }}} |
| 286 | Command: |
| 287 | {{{ |
| 288 | echo 'kresło' | tok | kor -1 |
| 289 | }}} |
| 290 | Output: |
| 291 | {{{ |
| 292 | 0000 06 W kresło kor:krzesło;kreso;kresko |
| 293 | 0006 01 S \n |
| 294 | }}} |
| 295 | |
| 296 | === kor.4. === |
| 297 | Description: |
| 298 | {{{ |
| 299 | Correct spelling. Change maximum edit distance into 3. |
| 300 | }}} |
| 301 | Command: |
| 302 | {{{ |
| 303 | echo 'pisemy' | tok | kor -n 3 |
| 304 | }}} |
| 305 | Output: |
| 306 | {{{ |
| 307 | 0000 06 W pisemy kor:piszmy |
| 308 | 0000 06 W pisemy kor:piszemy |
| 309 | 0000 06 W pisemy kor:pisemny |
| 310 | 0006 01 S \n |
| 311 | }}} |
| 312 | |
| 313 | === kor.5. === |
| 314 | Description: |
| 315 | {{{ |
| 316 | Correct spelling. Add operations' weights file. |
| 317 | }}} |
| 318 | Command: |
| 319 | {{{ |
| 320 | echo 'grzegrzułka' | tok | kor -w w.kor |
| 321 | }}} |
| 322 | File w.kor: |
| 323 | {{{ |
| 324 | %stdcor 1 |
| 325 | %xchg 1 |
| 326 | ż rz 0.3 |
| 327 | u ó 0.3 |
| 328 | }}} |
| 329 | Output: |
| 330 | {{{ |
| 331 | 0000 11 W grzegrzułka kor:gżegżółka |
| 332 | 0011 01 S \n |
| 333 | }}} |
| 334 | |
| 335 | |
| 336 | == sen == |
| 337 | |
| 338 | === sen.1. === |
| 339 | Description: |
| 340 | {{{ |
| 341 | Detect sentence boundaries. Mark beginning (BOS) and end (EOS) of each sentence. |
| 342 | }}} |
| 343 | Command: |
| 344 | {{{ |
| 345 | echo 'Ala? Ala ma kota. Kot ma Alę.' | tok | sen |
| 346 | }}} |
| 347 | Output: |
| 348 | {{{ |
| 349 | 0000 00 BOS * |
| 350 | 0000 03 W Ala |
| 351 | 0003 01 P ? |
| 352 | 0004 00 EOS * |
| 353 | 0004 00 BOS * |
| 354 | 0004 01 S _ |
| 355 | 0005 03 W Ala |
| 356 | 0008 01 S _ |
| 357 | 0009 02 W ma |
| 358 | 0011 01 S _ |
| 359 | 0012 04 W kota |
| 360 | 0016 01 P . |
| 361 | 0017 00 EOS * |
| 362 | 0017 00 BOS * |
| 363 | 0017 01 S _ |
| 364 | 0018 03 W Kot |
| 365 | 0021 01 S _ |
| 366 | 0022 02 W ma |
| 367 | 0024 01 S _ |
| 368 | 0025 03 W Alę |
| 369 | 0028 01 P . |
| 370 | 0029 01 S \n |
| 371 | 0030 00 EOS * |
| 372 | }}} |
| 373 | |
| 374 | |
| 375 | == ser == |
| 376 | |
| 377 | === ser.1. === |
| 378 | Description: |
| 379 | {{{ |
| 380 | Find pattern 'word(dom)'. Mark beginning (BOM) and end (EOM) of each matching. |
| 381 | }}} |
| 382 | Command: |
| 383 | {{{ |
| 384 | echo 'O Białym Domu mówi się, że to duży funkcjonalny dom.' | tok | lem -1 | ser -e 'word(dom)' |
| 385 | }}} |
| 386 | Output: |
| 387 | {{{ |
| 388 | 0000 01 W O lem:o,EXCL,P/Cal |
| 389 | 0001 01 S _ |
| 390 | 0002 06 W Białym lem:biały,ADJ/CdDpNp,ADJ/CilDpGainpNs |
| 391 | 0008 01 S _ |
| 392 | 0009 04 W Domu lem:dom,N/CgGiNs,N/ClGiNs,N/CvGiNs |
| 393 | 0013 01 S _ |
| 394 | 0014 04 W mówi lem:mówić,V/AiMdNsP3R-TfrVp |
| 395 | 0018 01 S _ |
| 396 | 0019 03 W się lem:się,NPRO/CaZx,NPRO/CgZx |
| 397 | 0022 01 P , |
| 398 | 0023 01 S _ |
| 399 | 0024 02 W że lem:że,CONJ,PART |
| 400 | 0026 01 S _ |
| 401 | 0027 02 W to lem:to,CONJ,NPRO/CaGnNsZd,NPRO/CnGnNsZd,PART;ten,ADJPRO/CanvGnNsZd |
| 402 | 0029 01 S _ |
| 403 | 0030 04 W duży lem:duży,ADJ/CaDpGiNs,ADJ/CnvDpGaipNs |
| 404 | 0034 01 S _ |
| 405 | 0035 12 W funkcjonalny lem:funkcjonalny,ADJ/CaDpGiNs,ADJ/CnvDpGaipNs |
| 406 | 0047 01 S _ |
| 407 | 0048 00 BOM * ser:1 |
| 408 | 0048 03 W dom lem:dom,N/CaGiNs,N/CnGiNs |
| 409 | 0051 00 EOM * ser:1 |
| 410 | 0051 01 P . |
| 411 | 0052 01 S \n |
| 412 | }}} |
| 413 | |
| 414 | === ser.2. === |
| 415 | Description: |
| 416 | {{{ |
| 417 | Find pattern 'word(dom)'. Mark beginning (BOM) and end (EOM) of each matching. |
| 418 | }}} |
| 419 | Command: |
| 420 | {{{ |
| 421 | echo 'O Białym Domu mówi się, że to duży funkcjonalny dom.' | tok | lem -1 | ser -m -e 'word(dom)' |
| 422 | }}} |
| 423 | Output: |
| 424 | {{{ |
| 425 | 0048 00 BOM * ser:1 |
| 426 | 0048 03 W dom lem:dom,N/CaGiNs,N/CnGiNs |
| 427 | 0051 00 EOM * ser:1 |
| 428 | }}} |
| 429 | |
| 430 | === ser.3. === |
| 431 | Description: |
| 432 | {{{ |
| 433 | Find pattern 'lexeme(dom)'. Mark beginning (BOM) and end (EOM) of each matching. |
| 434 | }}} |
| 435 | Command: |
| 436 | {{{ |
| 437 | echo 'O Białym Domu mówi się, że to duży funkcjonalny dom.' | tok | lem -1 | ser -e 'lexeme(dom)' |
| 438 | }}} |
| 439 | Output: |
| 440 | {{{ |
| 441 | 0000 01 W O lem:o,EXCL,P/Cal |
| 442 | 0001 01 S _ |
| 443 | 0002 06 W Białym lem:biały,ADJ/CdDpNp,ADJ/CilDpGainpNs |
| 444 | 0008 01 S _ |
| 445 | 0009 00 BOM * ser:1 |
| 446 | 0009 04 W Domu lem:dom,N/CgGiNs,N/ClGiNs,N/CvGiNs |
| 447 | 0013 00 EOM * ser:1 |
| 448 | 0013 01 S _ |
| 449 | 0014 04 W mówi lem:mówić,V/AiMdNsP3R-TfrVp |
| 450 | 0018 01 S _ |
| 451 | 0019 03 W się lem:się,NPRO/CaZx,NPRO/CgZx |
| 452 | 0022 01 P , |
| 453 | 0023 01 S _ |
| 454 | 0024 02 W że lem:że,CONJ,PART |
| 455 | 0026 01 S _ |
| 456 | 0027 02 W to lem:to,CONJ,NPRO/CaGnNsZd,NPRO/CnGnNsZd,PART;ten,ADJPRO/CanvGnNsZd |
| 457 | 0029 01 S _ |
| 458 | 0030 04 W duży lem:duży,ADJ/CaDpGiNs,ADJ/CnvDpGaipNs |
| 459 | 0034 01 S _ |
| 460 | 0035 12 W funkcjonalny lem:funkcjonalny,ADJ/CaDpGiNs,ADJ/CnvDpGaipNs |
| 461 | 0047 01 S _ |
| 462 | 0048 00 BOM * ser:2 |
| 463 | 0048 03 W dom lem:dom,N/CaGiNs,N/CnGiNs |
| 464 | 0051 00 EOM * ser:2 |
| 465 | 0051 01 P . |
| 466 | 0052 01 S \n |
| 467 | }}} |
| 468 | |
| 469 | === ser.4. === |
| 470 | Description: |
| 471 | {{{ |
| 472 | Find pattern 'lexeme(dom)'. Mark beginning (BOM) and end (EOM) of each matching. |
| 473 | }}} |
| 474 | Command: |
| 475 | {{{ |
| 476 | echo 'O Białym Domu mówi się, że to duży funkcjonalny dom.' | tok | lem -1 | ser -m -e 'lexeme(dom)' |
| 477 | }}} |
| 478 | Output: |
| 479 | {{{ |
| 480 | 0009 00 BOM * ser:1 |
| 481 | 0009 04 W Domu lem:dom,N/CgGiNs,N/ClGiNs,N/CvGiNs |
| 482 | 0013 00 EOM * ser:1 |
| 483 | 0048 00 BOM * ser:2 |
| 484 | 0048 03 W dom lem:dom,N/CaGiNs,N/CnGiNs |
| 485 | 0051 00 EOM * ser:2 |
| 486 | }}} |
| 487 | |
| 488 | === ser.5. === |
| 489 | Description: |
| 490 | {{{ |
| 491 | Find pattern 'cat(<ADJ>) space lexeme(dom)'. Mark beginning (BOM) and end (EOM) of each matching. |
| 492 | }}} |
| 493 | Command: |
| 494 | {{{ |
| 495 | echo 'O Białym Domu mówi się, że to duży funkcjonalny dom.' | tok | lem -1 | ser -e 'cat(<ADJ>) space lexeme(dom)' |
| 496 | }}} |
| 497 | Output: |
| 498 | {{{ |
| 499 | 0000 01 W O lem:o,EXCL,P/Cal |
| 500 | 0001 01 S _ |
| 501 | 0002 00 BOM * ser:1 |
| 502 | 0002 06 W Białym lem:biały,ADJ/CdDpNp,ADJ/CilDpGainpNs |
| 503 | 0008 01 S _ |
| 504 | 0009 04 W Domu lem:dom,N/CgGiNs,N/ClGiNs,N/CvGiNs |
| 505 | 0013 00 EOM * ser:1 |
| 506 | 0013 01 S _ |
| 507 | 0014 04 W mówi lem:mówić,V/AiMdNsP3R-TfrVp |
| 508 | 0018 01 S _ |
| 509 | 0019 03 W się lem:się,NPRO/CaZx,NPRO/CgZx |
| 510 | 0022 01 P , |
| 511 | 0023 01 S _ |
| 512 | 0024 02 W że lem:żeTfrVp,CONJ;że,PART |
| 513 | 0026 01 S _ |
| 514 | 0027 02 W to lem:toTfrVp,CONJ;to,NPRO/CaGnNsZd,NPRO/CnGnNsZd,PART;ten,ADJPRO/CanvGnNsZd |
| 515 | 0029 01 S _ |
| 516 | 0030 04 W duży lem:duży,ADJ/CaDpGiNs,ADJ/CnvDpGaipNs |
| 517 | 0034 01 S _ |
| 518 | 0035 00 BOM * ser:2 |
| 519 | 0035 12 W funkcjonalny lem:funkcjonalny,ADJ/CaDpGiNs,ADJ/CnvDpGaipNs |
| 520 | 0047 01 S _ |
| 521 | 0048 03 W dom lem:dom,N/CaGiNs,N/CnGiNs |
| 522 | 0051 00 EOM * ser:2 |
| 523 | 0051 01 P . |
| 524 | 0052 01 S \n |
| 525 | }}} |
| 526 | |
| 527 | === ser.6. === |
| 528 | Description: |
| 529 | {{{ |
| 530 | Find pattern 'cat(<ADJ>) space lexeme(dom)'. Mark beginning (BOM) and end (EOM) of each matching. |
| 531 | }}} |
| 532 | Command: |
| 533 | {{{ |
| 534 | echo 'O Białym Domu mówi się, że to duży funkcjonalny dom.' | tok | lem -1 | ser -m -e 'cat(<ADJ>) space lexeme(dom)' |
| 535 | }}} |
| 536 | Output: |
| 537 | {{{ |
| 538 | 0002 00 BOM * ser:1 |
| 539 | 0002 06 W Białym lem:biały,ADJ/CdDpNp,ADJ/CilDpGainpNs |
| 540 | 0008 01 S _ |
| 541 | 0009 04 W Domu lem:dom,N/CgGiNs,N/ClGiNs,N/CvGiNs |
| 542 | 0013 00 EOM * ser:1 |
| 543 | 0035 00 BOM * ser:2 |
| 544 | 0035 12 W funkcjonalny lem:funkcjonalny,ADJ/CaDpGiNs,ADJ/CnvDpGaipNs |
| 545 | 0047 01 S _ |
| 546 | 0048 03 W dom lem:dom,N/CaGiNs,N/CnGiNs |
| 547 | 0051 00 EOM * ser:2 |
| 548 | }}} |
| 549 | |
| 550 | === ser.7. === |
| 551 | Description: |
| 552 | {{{ |
| 553 | Find pattern '(cat(<ADJ>) space)+ lexeme(dom)'. Mark beginning (BOM) and end (EOM) of each matching. |
| 554 | }}} |
| 555 | Command: |
| 556 | {{{ |
| 557 | echo 'O Białym Domu mówi się, że to duży funkcjonalny dom.' | tok | lem -1 | ser -e '(cat(<ADJ>) space)+ lexeme(dom)' |
| 558 | }}} |
| 559 | Output: |
| 560 | {{{ |
| 561 | 0000 01 W O lem:o,EXCL,P/Cal |
| 562 | 0001 01 S _ |
| 563 | 0002 00 BOM * ser:1 |
| 564 | 0002 06 W Białym lem:biały,ADJ/CdDpNp,ADJ/CilDpGainpNs |
| 565 | 0008 01 S _ |
| 566 | 0009 04 W Domu lem:dom,N/CgGiNs,N/ClGiNs,N/CvGiNs |
| 567 | 0013 00 EOM * ser:1 |
| 568 | 0013 01 S _ |
| 569 | 0014 04 W mówi lem:mówić,V/AiMdNsP3R-TfrVp |
| 570 | 0018 01 S _ |
| 571 | 0019 03 W się lem:się,NPRO/CaZx,NPRO/CgZx |
| 572 | 0022 01 P , |
| 573 | 0023 01 S _ |
| 574 | 0024 02 W że lem:że,CONJ,PART |
| 575 | 0026 01 S _ |
| 576 | 0027 02 W to lem:to,CONJ,NPRO/CaGnNsZd,NPRO/CnGnNsZd,PART;ten,ADJPRO/CanvGnNsZd |
| 577 | 0029 01 S _ |
| 578 | 0030 00 BOM * ser:2 |
| 579 | 0030 04 W duży lem:duży,ADJ/CaDpGiNs,ADJ/CnvDpGaipNs |
| 580 | 0034 01 S _ |
| 581 | 0035 12 W funkcjonalny lem:funkcjonalny,ADJ/CaDpGiNs,ADJ/CnvDpGaipNs |
| 582 | 0047 01 S _ |
| 583 | 0048 03 W dom lem:dom,N/CaGiNs,N/CnGiNs |
| 584 | 0051 00 EOM * ser:2 |
| 585 | 0051 01 P . |
| 586 | 0052 01 S \n |
| 587 | }}} |
| 588 | |
| 589 | === ser.8. === |
| 590 | Description: |
| 591 | {{{ |
| 592 | Find pattern '(cat(<ADJ>) space)+ lexeme(dom)'. Mark beginning (BOM) and end (EOM) of each matching. |
| 593 | }}} |
| 594 | Command: |
| 595 | {{{ |
| 596 | echo 'O Białym Domu mówi się, że to duży funkcjonalny dom.' | tok | lem -1 | ser -m -e '(cat(<ADJ>) space)+ lexeme(dom)' |
| 597 | }}} |
| 598 | Output: |
| 599 | {{{ |
| 600 | 0002 00 BOM * ser:1 |
| 601 | 0002 06 W Białym lem:biały,ADJ/CdDpNp,ADJ/CilDpGainpNs |
| 602 | 0008 01 S _ |
| 603 | 0009 04 W Domu lem:dom,N/CgGiNs,N/ClGiNs,N/CvGiNs |
| 604 | 0013 00 EOM * ser:1 |
| 605 | 0030 00 BOM * ser:2 |
| 606 | 0030 04 W duży lem:duży,ADJ/CaDpGiNs,ADJ/CnvDpGaipNs |
| 607 | 0034 01 S _ |
| 608 | 0035 12 W funkcjonalny lem:funkcjonalny,ADJ/CaDpGiNs,ADJ/CnvDpGaipNs |
| 609 | 0047 01 S _ |
| 610 | 0048 03 W dom lem:dom,N/CaGiNs,N/CnGiNs |
| 611 | 0051 00 EOM * ser:2 |
| 612 | }}} |
| 613 | |
| 614 | === ser.9. === |
| 615 | Description: |
| 616 | {{{ |
| 617 | Find pattern 'cat(<ADJ>) space lexeme(dom)'. Mark beginning (BOM) and end (EOM) of each matching. Print the generated flex source code. |
| 618 | }}} |
| 619 | Command: |
| 620 | {{{ |
| 621 | echo 'O Białym Domu mówi się, że to duży funkcjonalny dom.' | tok | lem -1 | ser -e 'cat(<ADJ>) space lexeme(dom)' --flex |
| 622 | }}} |
| 623 | Output: |
| 624 | {{{ |
| 625 | %{ |
| 626 | #include<string.h> |
| 627 | int n=0; |
| 628 | %} |
| 629 | |
| 630 | %% |
| 631 | |
| 632 | ([ \t]*(([0-9]+[ \t]+)([0-9]+[ \t]+)?)?([^ \t\n\r\f]+)[ \t]+([^ \t\n\r\f]+)([ \t]+(([^ \t\n\r\f]+[ \t]+)*(lem:[^ \t\n\r\f]+,ADJ(\/([[:upper:]]+([[:lower:][:digit:]+?!*-]|<[^>\n[:cntrl:]]+>)+)*)?([,;][^ \t\n\r\f]+)?)([ \t]+[^ \t\n\r\f]+)*))[ \t]*\n)([ \t]*(([0-9]+[ \t]+)([0-9]+[ \t]+)?)?(S)[ \t]+([^ \t\n\r\f]+)(([ \t]+[^ \t\n\r\f]+)*)[ \t]*\n)([ \t]*(([0-9]+[ \t]+)([0-9]+[ \t]+)?)?([^ \t\n\r\f]+)[ \t]+([^ \t\n\r\f]+)([ \t]+(([^ \t\n\r\f]+[ \t]+)*(lem:([^ \t\n\r\f]+;)?dom,[^ \t\n\r\f]+)([ \t]+[^ \t\n\r\f]+)*))[ \t]*\n) { |
| 633 | int start, end, len; |
| 634 | char *lastseg, *tmp; |
| 635 | if(yytext[yyleng-1]!='\n') |
| 636 | {fprintf(stderr,"ser: pattern matches incomplete line\n"); exit(1);} |
| 637 | n++; |
| 638 | sscanf(yytext,"%d %d",&start,&len); |
| 639 | yytext[yyleng-1]='\0'; |
| 640 | if(tmp=strrchr(yytext,'\n')) |
| 641 | { |
| 642 | lastseg=tmp+1; |
| 643 | sscanf(lastseg,"%d %d", &end, &len); |
| 644 | } |
| 645 | else |
| 646 | end=start; |
| 647 | yytext[yyleng-1]='\n'; |
| 648 | printf("%04d 00 BOM * ser:%s%d\n",start,"",n); |
| 649 | ECHO; |
| 650 | printf("%04d 00 EOM * ser:%s%d\n",end+len,"",n); |
| 651 | } |
| 652 | |
| 653 | |
| 654 | .*\n ECHO; |
| 655 | }}} |
| 656 | |
| 657 | === ser.10. === |
| 658 | Description: |
| 659 | {{{ |
| 660 | Find pattern '(cat(<N>) space cat(<ADJ>)) | (cat(<ADJ>) space cat(<N>))'. Mark beginning (BOM) and end (EOM) of each matching. |
| 661 | }}} |
| 662 | Command: |
| 663 | {{{ |
| 664 | echo 'Panna młoda dostała sztuczne kwiaty i wieczne pióro. Rozpakowała wszystko na klatce schodowej.' | tok | lem -1 | ser -e '(cat(<N>) space cat(<ADJ>)) | (cat(<ADJ>) space cat(<N>))' |
| 665 | }}} |
| 666 | Output: |
| 667 | {{{ |
| 668 | 0000 00 BOM * ser:1 |
| 669 | 0000 05 W Panna lem:panna,N/CnGfNs |
| 670 | 0005 01 S _ |
| 671 | 0006 05 W młoda lem:młoda,N/CnGfNs,N/CvGfNs;młody,ADJ/CanvDpGfNs |
| 672 | 0011 00 EOM * ser:1 |
| 673 | 0011 01 S _ |
| 674 | 0012 07 W dostała lem:dostać,V/ApGfMdNsP3R?TaVp |
| 675 | 0019 01 S _ |
| 676 | 0020 00 BOM * ser:2 |
| 677 | 0020 08 W sztuczne lem:sztuczny,ADJ/CanvDpGafinNp,ADJ/CanvDpGnNs |
| 678 | 0028 01 S _ |
| 679 | 0029 06 W kwiaty lem:kwiat,N/CaGiNp,N/CnGiNp,N/CvGiNp |
| 680 | 0035 00 EOM * ser:2 |
| 681 | 0035 01 S _ |
| 682 | 0036 01 W i lem:i,CONJ,EXCL |
| 683 | 0037 01 S _ |
| 684 | 0038 00 BOM * ser:3 |
| 685 | 0038 07 W wieczne lem:wieczny,ADJ/CanvDpGafinNp,ADJ/CanvDpGnNs |
| 686 | 0045 01 S _ |
| 687 | 0046 05 W pióro lem:pióro,N/CaGnNs,N/CnGnNs,N/CvGnNs |
| 688 | 0051 00 EOM * ser:3 |
| 689 | 0051 01 P . |
| 690 | 0052 01 S _ |
| 691 | 0053 11 W Rozpakowała lem:rozpakować,V/ApGfMdNsP3R-TaVp |
| 692 | 0064 01 S _ |
| 693 | 0065 08 W wszystko lem:wszystko,NPRO/CaGnNsZg,NPRO/CnGnNsZg;wszystek,ADJPRO/CanvGnNsZg |
| 694 | 0073 01 S _ |
| 695 | 0074 02 W na lem:na,P/Cal |
| 696 | 0076 01 S _ |
| 697 | 0077 00 BOM * ser:4 |
| 698 | 0077 06 W klatce lem:klatka,N/CdGfNs,N/ClGfNs |
| 699 | 0083 01 S _ |
| 700 | 0084 09 W schodowej lem:schodowy,ADJ/CdglDpGfNs |
| 701 | 0093 00 EOM * ser:4 |
| 702 | 0093 01 P . |
| 703 | 0094 01 S \n |
| 704 | }}} |
| 705 | |
| 706 | === ser.11. === |
| 707 | Description: |
| 708 | {{{ |
| 709 | Find pattern '(cat(<N>) space cat(<ADJ>)) | (cat(<ADJ>) space cat(<N>))'. Mark beginning (BOM) and end (EOM) of each matching. |
| 710 | }}} |
| 711 | Command: |
| 712 | {{{ |
| 713 | echo 'Panna młoda dostała sztuczne kwiaty i wieczne pióro. Rozpakowała wszystko na klatce schodowej.' | tok | lem -1 | ser -m -e '(cat(<N>) space cat(<ADJ>)) | (cat(<ADJ>) space cat(<N>))' |
| 714 | }}} |
| 715 | Output: |
| 716 | {{{ |
| 717 | 0000 00 BOM * ser:1 |
| 718 | 0000 05 W Panna lem:panna,N/CnGfNs |
| 719 | 0005 01 S _ |
| 720 | 0006 05 W młoda lem:młoda,N/CnGfNs,N/CvGfNs;młody,ADJ/CanvDpGfNs |
| 721 | 0011 00 EOM * ser:1 |
| 722 | 0020 00 BOM * ser:2 |
| 723 | 0020 08 W sztuczne lem:sztuczny,ADJ/CanvDpGafinNp,ADJ/CanvDpGnNs |
| 724 | 0028 01 S _ |
| 725 | 0029 06 W kwiaty lem:kwiat,N/CaGiNp,N/CnGiNp,N/CvGiNp |
| 726 | 0035 00 EOM * ser:2 |
| 727 | 0038 00 BOM * ser:3 |
| 728 | 0038 07 W wieczne lem:wieczny,ADJ/CanvDpGafinNp,ADJ/CanvDpGnNs |
| 729 | 0045 01 S _ |
| 730 | 0046 05 W pióro lem:pióro,N/CaGnNs,N/CnGnNs,N/CvGnNs |
| 731 | 0051 00 EOM * ser:3 |
| 732 | 0077 00 BOM * ser:4 |
| 733 | 0077 06 W klatce lem:klatka,N/CdGfNs,N/ClGfNs |
| 734 | 0083 01 S _ |
| 735 | 0084 09 W schodowej lem:schodowy,ADJ/CdglDpGfNs |
| 736 | 0093 00 EOM * ser:4 |
| 737 | }}} |
| 738 | |
| 739 | === ser.12. === |
| 740 | Description: |
| 741 | {{{ |
| 742 | Find pattern 'cat(<N>)'. Mark beginning (BOM) and end (EOM) of each matching. |
| 743 | }}} |
| 744 | Command: |
| 745 | {{{ |
| 746 | echo 'Kot, kota, kotu, kocie, kotem' | tok | lem -1 | ser -m -e 'cat(<N>)' |
| 747 | }}} |
| 748 | Output: |
| 749 | {{{ |
| 750 | 0000 00 BOM * ser:1 |
| 751 | 0000 03 W Kot lem:kota,N/CgGfNp;kot,N/CnGaNs |
| 752 | 0003 00 EOM * ser:1 |
| 753 | 0005 00 BOM * ser:2 |
| 754 | 0005 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 755 | 0009 00 EOM * ser:2 |
| 756 | 0011 00 BOM * ser:3 |
| 757 | 0011 04 W kotu lem:kot,N/CdGaNs |
| 758 | 0015 00 EOM * ser:3 |
| 759 | 0017 00 BOM * ser:4 |
| 760 | 0017 05 W kocie lem:kota,N/CdGfNs,N/ClGfNs;kot,N/ClGaNs,N/CvGaNs |
| 761 | 0022 00 EOM * ser:4 |
| 762 | 0024 00 BOM * ser:5 |
| 763 | 0024 05 W kotem lem:kot,N/CiGaNs |
| 764 | 0029 00 EOM * ser:5 |
| 765 | }}} |
| 766 | |
| 767 | === ser.13. === |
| 768 | Description: |
| 769 | {{{ |
| 770 | Find pattern 'cat(<N/Ci>)'. Mark beginning (BOM) and end (EOM) of each matching. |
| 771 | }}} |
| 772 | Command: |
| 773 | {{{ |
| 774 | echo 'Kot, kota, kotu, kocie, kotem' | tok | lem -1 | ser -m -e 'cat(<N/Ci>)' |
| 775 | }}} |
| 776 | Output: |
| 777 | {{{ |
| 778 | 0024 00 BOM * ser:1 |
| 779 | 0024 05 W kotem lem:kot,N/CiGaNs |
| 780 | 0029 00 EOM * ser:1 |
| 781 | }}} |
| 782 | |
| 783 | === ser.14. === |
| 784 | Description: |
| 785 | {{{ |
| 786 | Find pattern 'form(była)'. Mark beginning (BOM) and end (EOM) of each matching. |
| 787 | }}} |
| 788 | Command: |
| 789 | {{{ |
| 790 | echo 'Ala była tu, teraz jest tam.' | tok | lem -1 | ser -m -e 'form(była)' |
| 791 | }}} |
| 792 | Output: |
| 793 | {{{ |
| 794 | 0004 00 BOM * ser:1 |
| 795 | 0004 04 W była lem:były,ADJ/CanvDpGfNs;być,BYC/GfMdNsP3TaVp |
| 796 | 0008 00 EOM * ser:1 |
| 797 | }}} |
| 798 | |
| 799 | === ser.15. === |
| 800 | Description: |
| 801 | {{{ |
| 802 | Find pattern 'seg{3} word(teraz) seg{4}'. Mark beginning (BOM) and end (EOM) of each matching. |
| 803 | }}} |
| 804 | Command: |
| 805 | {{{ |
| 806 | echo 'Ala była tu, teraz jest tam.' | tok | lem -1 | ser -m -e 'seg{3} word(teraz) seg{4}' |
| 807 | }}} |
| 808 | Output: |
| 809 | {{{ |
| 810 | 0009 00 BOM * ser:1 |
| 811 | 0009 02 W tu lem:tu,ADVPRO/Zd,PART |
| 812 | 0011 01 P , |
| 813 | 0012 01 S _ |
| 814 | 0013 05 W teraz lem:teraz,ADV/Dp |
| 815 | 0018 01 S _ |
| 816 | 0019 04 W jest lem:być,BYC/MdNsP3TfrVp |
| 817 | 0023 01 S _ |
| 818 | 0024 03 W tam lem:tama,N/CgGfNp;tam,ADVPRO/Zd,ONO |
| 819 | 0027 00 EOM * ser:1 |
| 820 | }}} |
| 821 | |
| 822 | === ser.16. === |
| 823 | Description: |
| 824 | {{{ |
| 825 | Find pattern 'word space number punct number'. Mark beginning (BOM) and end (EOM) of each matching. |
| 826 | }}} |
| 827 | Command: |
| 828 | {{{ |
| 829 | echo 'Czy 2+2 to cztery?' | tok | lem -1 | ser -m -e 'word space number punct number' |
| 830 | }}} |
| 831 | Output: |
| 832 | {{{ |
| 833 | 0000 00 BOM * ser:1 |
| 834 | 0000 03 W Czy lem:czy,CONJ,PART |
| 835 | 0003 01 S _ |
| 836 | 0004 01 N 2 |
| 837 | 0005 01 P + |
| 838 | 0006 01 N 2 |
| 839 | 0007 00 EOM * ser:1 |
| 840 | }}} |
| 841 | |
| 842 | |
| 843 | == grp == |
| 844 | |
| 845 | === grp.1. === |
| 846 | Description: |
| 847 | {{{ |
| 848 | Select sentences containing an expression matching a pattern 'lexeme(kot)'. |
| 849 | }}} |
| 850 | Command: |
| 851 | {{{ |
| 852 | echo 'Ala ma kota.' | tok | lem -1 | grp -e 'lexeme(kot)' |
| 853 | }}} |
| 854 | Output: |
| 855 | {{{ |
| 856 | 0000 03 W Ala |
| 857 | 0003 01 S _ |
| 858 | 0004 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 859 | 0006 01 S _ |
| 860 | 0007 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 861 | 0011 01 P . |
| 862 | 0012 01 S \n |
| 863 | }}} |
| 864 | |
| 865 | === grp.2. === |
| 866 | Description: |
| 867 | {{{ |
| 868 | Select sentences containing an expression matching a pattern 'lexeme(kot)'. |
| 869 | }}} |
| 870 | Command: |
| 871 | {{{ |
| 872 | echo 'Ala ma psa.' | tok | lem -1 | grp -e 'lexeme(kot)' |
| 873 | }}} |
| 874 | Output: |
| 875 | {{{ |
| 876 | |
| 877 | }}} |
| 878 | |
| 879 | |
| 880 | == mar == |
| 881 | |
| 882 | === mar.1. === |
| 883 | Description: |
| 884 | {{{ |
| 885 | Match pattern 'lexeme(kot)'. Mark beginning (BOM) and end (EOM) of each matching. |
| 886 | }}} |
| 887 | Command: |
| 888 | {{{ |
| 889 | echo 'Ala ma kota. Ola ma kota. Bartosz ma psa.' | tok | lem -1 | mar -e 'lexeme(kot)' |
| 890 | }}} |
| 891 | Output: |
| 892 | {{{ |
| 893 | 0000 03 W Ala |
| 894 | 0003 01 S _ |
| 895 | 0004 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 896 | 0006 01 S _ |
| 897 | 0007 00 BOM * |
| 898 | 0007 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 899 | 0011 00 EOM * |
| 900 | 0011 01 P . |
| 901 | 0012 01 S _ |
| 902 | 0013 03 W Ola |
| 903 | 0016 01 S _ |
| 904 | 0017 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 905 | 0019 01 S _ |
| 906 | 0020 00 BOM * |
| 907 | 0020 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 908 | 0024 00 EOM * |
| 909 | 0024 01 P . |
| 910 | 0025 01 S _ |
| 911 | 0026 07 W Bartosz lem:Bartosz,N/CnGpNs |
| 912 | 0033 01 S _ |
| 913 | 0034 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 914 | 0036 01 S _ |
| 915 | 0037 03 W psa lem:pies,N/CaGaNs,N/CgGaNs |
| 916 | 0040 01 P . |
| 917 | 0041 01 S \n |
| 918 | }}} |
| 919 | |
| 920 | === mar.2. === |
| 921 | Description: |
| 922 | {{{ |
| 923 | Match pattern 'lexeme(kot)'. Mark matching parts with MATCH tags (before and after any form of lexeme 'kot'). |
| 924 | }}} |
| 925 | Command: |
| 926 | {{{ |
| 927 | echo 'Ala ma kota. Ola ma kota. Bartosz ma psa.' | tok | lem -1 | mar -e '@MATCH lexeme(kot) @MATCH' |
| 928 | }}} |
| 929 | Output: |
| 930 | {{{ |
| 931 | 0000 03 W Ala |
| 932 | 0003 01 S _ |
| 933 | 0004 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 934 | 0006 01 S _ |
| 935 | 0007 00 MATCH * |
| 936 | 0007 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 937 | 0011 00 MATCH * |
| 938 | 0011 01 P . |
| 939 | 0012 01 S _ |
| 940 | 0013 03 W Ola |
| 941 | 0016 01 S _ |
| 942 | 0017 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 943 | 0019 01 S _ |
| 944 | 0020 00 MATCH * |
| 945 | 0020 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 946 | 0024 00 MATCH * |
| 947 | 0024 01 P . |
| 948 | 0025 01 S _ |
| 949 | 0026 07 W Bartosz lem:Bartosz,N/CnGpNs |
| 950 | 0033 01 S _ |
| 951 | 0034 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 952 | 0036 01 S _ |
| 953 | 0037 03 W psa lem:pies,N/CaGaNs,N/CgGaNs |
| 954 | 0040 01 P . |
| 955 | 0041 01 S \n |
| 956 | }}} |
| 957 | |
| 958 | === mar.3. === |
| 959 | Description: |
| 960 | {{{ |
| 961 | Match pattern 'lexeme(kot)'. Mark matching parts with BEGINMATCH tags (only before any form of lexeme 'kot'). |
| 962 | }}} |
| 963 | Command: |
| 964 | {{{ |
| 965 | echo 'Ala ma kota. Ola ma kota.' | tok | lem -1 | mar -e '@BEGINMATCH lexeme(kot)' |
| 966 | }}} |
| 967 | Output: |
| 968 | {{{ |
| 969 | 0000 03 W Ala |
| 970 | 0003 01 S _ |
| 971 | 0004 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 972 | 0006 01 S _ |
| 973 | 0007 00 BEGINMATCH * |
| 974 | 0007 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 975 | 0011 01 P . |
| 976 | 0012 01 S _ |
| 977 | 0013 03 W Ola |
| 978 | 0016 01 S _ |
| 979 | 0017 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 980 | 0019 01 S _ |
| 981 | 0020 00 BEGINMATCH * |
| 982 | 0020 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 983 | 0024 01 P . |
| 984 | 0025 01 S \n |
| 985 | }}} |
| 986 | |
| 987 | === mar.4. === |
| 988 | Description: |
| 989 | {{{ |
| 990 | Match pattern 'lexeme(kot)'. Mark matching parts with ENDMATCH tags (only after any form of lexeme 'kot'). |
| 991 | }}} |
| 992 | Command: |
| 993 | {{{ |
| 994 | echo 'Ala ma kota. Ola ma kota.' | tok | lem -1 | mar -e 'lexeme(kot) @ENDMATCH' |
| 995 | }}} |
| 996 | Output: |
| 997 | {{{ |
| 998 | 0000 03 W Ala |
| 999 | 0003 01 S _ |
| 1000 | 0004 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 1001 | 0006 01 S _ |
| 1002 | 0007 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 1003 | 0011 00 ENDMATCH * |
| 1004 | 0011 01 P . |
| 1005 | 0012 01 S _ |
| 1006 | 0013 03 W Ola |
| 1007 | 0016 01 S _ |
| 1008 | 0017 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 1009 | 0019 01 S _ |
| 1010 | 0020 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 1011 | 0024 00 ENDMATCH * |
| 1012 | 0024 01 P . |
| 1013 | 0025 01 S \n |
| 1014 | }}} |
| 1015 | |
| 1016 | === mar.5. === |
| 1017 | Description: |
| 1018 | {{{ |
| 1019 | Match pattern 'word(ma) space lexeme(kot)'. Mark matching parts with MA (before and after each word 'ma') and KOT (before and after any form of lexeme 'kot') tags. |
| 1020 | }}} |
| 1021 | Command: |
| 1022 | {{{ |
| 1023 | echo 'Ala ma kota. Ola ma kota.' | tok | lem -1 | mar -e '@MA word(ma) @MA space @KOT lexeme(kot) @KOT' |
| 1024 | }}} |
| 1025 | Output: |
| 1026 | {{{ |
| 1027 | 0000 03 W Ala |
| 1028 | 0003 01 S _ |
| 1029 | 0004 00 MA * |
| 1030 | 0004 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 1031 | 0006 00 MA * |
| 1032 | 0006 01 S _ |
| 1033 | 0007 00 KOT * |
| 1034 | 0007 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 1035 | 0011 00 KOT * |
| 1036 | 0011 01 P . |
| 1037 | 0012 01 S _ |
| 1038 | 0013 03 W Ola |
| 1039 | 0016 01 S _ |
| 1040 | 0017 00 MA * |
| 1041 | 0017 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 1042 | 0019 00 MA * |
| 1043 | 0019 01 S _ |
| 1044 | 0020 00 KOT * |
| 1045 | 0020 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 1046 | 0024 00 KOT * |
| 1047 | 0024 01 P . |
| 1048 | 0025 01 S \n |
| 1049 | }}} |
| 1050 | |
| 1051 | === mar.6. === |
| 1052 | Description: |
| 1053 | {{{ |
| 1054 | Match pattern 'word(ma) space lexeme(kot)'. Mark matching parts with MA (before and after each word 'ma'), KOT (before and after any form of lexeme 'kot'), BEGINMATCH, ENDMATCH (before and after each matching) tags. |
| 1055 | }}} |
| 1056 | Command: |
| 1057 | {{{ |
| 1058 | echo 'Ala ma kota. Ola ma kota.' | tok | lem -1 | mar -e '@BEGINMATCH @MA word(ma) @MA space @KOT lexeme(kot) @KOT @ENDMATCH' |
| 1059 | }}} |
| 1060 | Output: |
| 1061 | {{{ |
| 1062 | 0000 03 W Ala |
| 1063 | 0003 01 S _ |
| 1064 | 0004 0004 00 BEGINMATCH * |
| 1065 | 0004 00 MA * |
| 1066 | 0004 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 1067 | 0006 00 MA * |
| 1068 | 0006 01 S _ |
| 1069 | 0007 00 KOT * |
| 1070 | 0007 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 1071 | 0011 00 KOT * |
| 1072 | 0011 00 ENDMATCH * |
| 1073 | 0011 01 P . |
| 1074 | 0012 01 S _ |
| 1075 | 0013 03 W Ola |
| 1076 | 0016 01 S _ |
| 1077 | 0017 0017 00 BEGINMATCH * |
| 1078 | 0017 00 MA * |
| 1079 | 0017 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 1080 | 0019 00 MA * |
| 1081 | 0019 01 S _ |
| 1082 | 0020 00 KOT * |
| 1083 | 0020 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 1084 | 0024 00 KOT * |
| 1085 | 0024 00 ENDMATCH * |
| 1086 | 0024 01 P . |
| 1087 | 0025 01 S \n |
| 1088 | }}} |
| 1089 | |
| 1090 | |
| 1091 | == kot == |
| 1092 | |
| 1093 | === kot.1. === |
| 1094 | Description: |
| 1095 | {{{ |
| 1096 | Change raw text into list of tokens. |
| 1097 | }}} |
| 1098 | Command: |
| 1099 | {{{ |
| 1100 | echo 'Ala ma kota.' | tok |
| 1101 | }}} |
| 1102 | Output: |
| 1103 | {{{ |
| 1104 | 0000 03 W Ala |
| 1105 | 0003 01 S _ |
| 1106 | 0004 02 W ma |
| 1107 | 0006 01 S _ |
| 1108 | 0007 04 W kota |
| 1109 | 0011 01 P . |
| 1110 | 0012 01 S \n |
| 1111 | }}} |
| 1112 | |
| 1113 | === kot.2. === |
| 1114 | Description: |
| 1115 | {{{ |
| 1116 | Change list of tokens into raw text. |
| 1117 | }}} |
| 1118 | Command: |
| 1119 | {{{ |
| 1120 | echo 'Ala ma kota.' | tok | kot |
| 1121 | }}} |
| 1122 | Output: |
| 1123 | {{{ |
| 1124 | Ala ma kota. |
| 1125 | }}} |
| 1126 | |
| 1127 | === kot.3. === |
| 1128 | Description: |
| 1129 | {{{ |
| 1130 | Change list of tokens into raw text. Retain the special characters. |
| 1131 | }}} |
| 1132 | Command: |
| 1133 | {{{ |
| 1134 | echo 'Ala ma kota.' | tok | kot -r |
| 1135 | }}} |
| 1136 | Output: |
| 1137 | {{{ |
| 1138 | Ala_ma_kota.\n |
| 1139 | }}} |
| 1140 | |
| 1141 | === kot.4. === |
| 1142 | Description: |
| 1143 | {{{ |
| 1144 | Change list of tokens into raw text. |
| 1145 | }}} |
| 1146 | Command: |
| 1147 | {{{ |
| 1148 | echo 'Panna młoda dostała sztuczne kwiaty i wieczne pióro. Rozpakowała wszystko na klatce schodowej.' | tok | lem -1 | ser -m -e '(cat(<N>) space cat(<ADJ>)) | (cat(<ADJ>) space cat(<N>))' | kot |
| 1149 | }}} |
| 1150 | Output: |
| 1151 | {{{ |
| 1152 | Panna młoda |
| 1153 | ----- |
| 1154 | sztuczne kwiaty |
| 1155 | ----- |
| 1156 | wieczne pióro |
| 1157 | ----- |
| 1158 | klatce schodowej |
| 1159 | }}} |
| 1160 | |
| 1161 | |
| 1162 | == fla == |
| 1163 | |
| 1164 | === fla.1. === |
| 1165 | Description: |
| 1166 | {{{ |
| 1167 | 'Flatten' a utt file by merging segments belonging to one sentence in one line. |
| 1168 | }}} |
| 1169 | Command: |
| 1170 | {{{ |
| 1171 | echo 'Ala ma kota. Ola ma kota. Paweł też ma.' | tok | sen | lem -1 | grp -e 'lexeme(kot)' | fla |
| 1172 | }}} |
| 1173 | Output: |
| 1174 | {{{ |
| 1175 | 0000 00 BOS * |
| 1176 | 0000 03 W Ala |
| 1177 | 0003 01 S _ |
| 1178 | 0004 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 1179 | 0006 01 S _ |
| 1180 | 0007 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 1181 | 0011 01 P . |
| 1182 | 0012 00 EOS * |
| 1183 | 0012 00 BOS * |
| 1184 | 0012 01 S _ |
| 1185 | 0013 03 W Ola |
| 1186 | 0016 01 S _ |
| 1187 | 0017 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 1188 | 0019 01 S _ |
| 1189 | 0020 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 1190 | 0024 01 P . |
| 1191 | 0025 00 EOS * |
| 1192 | }}} |
| 1193 | |
| 1194 | |
| 1195 | == unfla == |
| 1196 | |
| 1197 | === unfla.1. === |
| 1198 | Description: |
| 1199 | {{{ |
| 1200 | Transform a flattened UTT file into the regular format by restoring end-of-line characters. |
| 1201 | }}} |
| 1202 | Command: |
| 1203 | {{{ |
| 1204 | echo 'Ala ma kota. Ola ma kota. Paweł też ma.' | tok | sen | lem -1 | fla | unfla |
| 1205 | }}} |
| 1206 | Output: |
| 1207 | {{{ |
| 1208 | 0000 00 BOS * |
| 1209 | 0000 03 W Ala |
| 1210 | 0003 01 S _ |
| 1211 | 0004 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 1212 | 0006 01 S _ |
| 1213 | 0007 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 1214 | 0011 01 P . |
| 1215 | 0012 00 EOS * |
| 1216 | 0012 00 BOS * |
| 1217 | 0012 01 S _ |
| 1218 | 0013 03 W Ola |
| 1219 | 0016 01 S _ |
| 1220 | 0017 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 1221 | 0019 01 S _ |
| 1222 | 0020 04 W kota lem:kota,N/CnGfNs;kot,N/CaGaNs,N/CgGaNs |
| 1223 | 0024 01 P . |
| 1224 | 0025 00 EOS * |
| 1225 | 0025 00 BOS * |
| 1226 | 0025 01 S _ |
| 1227 | 0026 05 W Paweł |
| 1228 | 0031 01 S _ |
| 1229 | 0032 03 W też lem:też,CONJ,PART |
| 1230 | 0035 01 S _ |
| 1231 | 0036 02 W ma lem:mieć,V/AiMdNsP3R?TfrVp;mój,ADJPRO/CnvGfNsZs |
| 1232 | 0038 01 P . |
| 1233 | 0039 01 S \n |
| 1234 | 0040 00 EOS * |
| 1235 | }}} |
| 1236 | |
| 1237 | |
| 1238 | == help == |
| 1239 | |
| 1240 | === help.1. === |
| 1241 | Description: |
| 1242 | {{{ |
| 1243 | Print tok help. |
| 1244 | }}} |
| 1245 | Command: |
| 1246 | {{{ |
| 1247 | tok -h |
| 1248 | }}} |
| 1249 | Output: |
| 1250 | {{{ |
| 1251 | tok 0.1 |
| 1252 | |
| 1253 | Usage: tok [OPTIONS]... |
| 1254 | |
| 1255 | -h, --help Print help and exit |
| 1256 | -V, --version Print version and exit |
| 1257 | -i, --interactive Interactive mode. (default=off) |
| 1258 | }}} |
| 1259 | |
| 1260 | === help.2. === |
| 1261 | Description: |
| 1262 | {{{ |
| 1263 | Print lem help. |
| 1264 | }}} |
| 1265 | Command: |
| 1266 | {{{ |
| 1267 | lem -h |
| 1268 | }}} |
| 1269 | Output: |
| 1270 | {{{ |
| 1271 | lem 0.1 |
| 1272 | |
| 1273 | Usage: lem [OPTIONS]... |
| 1274 | |
| 1275 | -h, --help Print help and exit |
| 1276 | --full-help Print help, including hidden options, and exit |
| 1277 | -V, --version Print version and exit |
| 1278 | ... |
| 1279 | }}} |
| 1280 | |
| 1281 | === help.3. === |
| 1282 | Description: |
| 1283 | {{{ |
| 1284 | Print gue help. |
| 1285 | }}} |
| 1286 | Command: |
| 1287 | {{{ |
| 1288 | gue -h |
| 1289 | }}} |
| 1290 | Output: |
| 1291 | {{{ |
| 1292 | guess 0.1 |
| 1293 | |
| 1294 | Usage: guess [OPTIONS]... |
| 1295 | |
| 1296 | -h, --help Print help and exit |
| 1297 | --full-help Print help, including hidden options, and exit |
| 1298 | -V, --version Print version and exit |
| 1299 | ... |
| 1300 | }}} |
| 1301 | |
| 1302 | |
| 1303 | == version == |
| 1304 | |
| 1305 | === version.1. === |
| 1306 | Description: |
| 1307 | {{{ |
| 1308 | Print tok version information. |
| 1309 | }}} |
| 1310 | Command: |
| 1311 | {{{ |
| 1312 | tok -V |
| 1313 | }}} |
| 1314 | Output: |
| 1315 | {{{ |
| 1316 | tok 0.1 |
| 1317 | }}} |
| 1318 | |
| 1319 | === version.2. === |
| 1320 | Description: |
| 1321 | {{{ |
| 1322 | Print lem version information. |
| 1323 | }}} |
| 1324 | Command: |
| 1325 | {{{ |
| 1326 | lem -V |
| 1327 | }}} |
| 1328 | Output: |
| 1329 | {{{ |
| 1330 | lem 0.1 |
| 1331 | }}} |
| 1332 | |
| 1333 | |
| 1334 | === version.3. === |
| 1335 | Description: |
| 1336 | {{{ |
| 1337 | Print gue version information. |
| 1338 | }}} |
| 1339 | Command: |
| 1340 | {{{ |
| 1341 | gue -V |
| 1342 | }}} |
| 1343 | Output: |
| 1344 | {{{ |
| 1345 | guess 0.1 |
| 1346 | }}} |