/[zanavi_public1]/navit/navit/linguistics.c
ZANavi

Diff of /navit/navit/linguistics.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

Revision 14 Revision 15
228{"ð","d","dh"}, 228{"ð","d","dh"},
229{"ŋ","n","ng"}, 229{"ŋ","n","ng"},
230{"þ","t","th"}, 230{"þ","t","th"},
231 231
232/* Cyrillic capital */ 232/* Cyrillic capital */
233
233{"Ё","Е"}, 234{"Ё","Е"},
234{"Й","И"}, 235{"Й","И"},
235{"І","I"}, 236{"І","I"},
236{"Ї","I"}, 237{"Ї","I"},
237{"Ў","У"}, 238{"Ў","У"},
238{"Є","Е","Э"}, 239{"Є","Е","Э"},
239{"Ґ","Г"}, 240{"Ґ","Г"},
240{"Ѓ","Г"}, 241{"Ѓ","Г"},
241{"Ђ","Д"}, 242{"Ђ","Д"},
242{"Ќ","К"}, 243{"Ќ","К"},
243{"Љ","Л","ЛЬ"}, 244//{"Љ","Л","ЛЬ"},
244{"Њ","Н","НЬ"}, 245//{"Њ","Н","НЬ"},
245{"Џ","Ц"}, 246{"Џ","Ц"},
246 247
247/* Cyrillic small */ 248/* Cyrillic small */
249
248{"ё","е"}, 250{"ё","е"},
249{"й","и"}, 251{"й","и"},
250{"і","i"}, 252{"і","i"},
251{"ї","i"}, 253{"ї","i"},
252{"ў","у"}, 254{"ў","у"},
253{"є","е","э"}, 255//{"є","е","э"},
254{"ґ","г"}, 256{"ґ","г"},
255{"ѓ","г"}, 257{"ѓ","г"},
256{"ђ","д"}, 258{"ђ","д"},
257{"ќ","к"}, 259{"ќ","к"},
258{"љ","л","ль"}, 260//{"љ","л","ль"},
259{"њ","н","нь"}, 261//{"њ","н","нь"},
260{"џ","ц"}, 262{"џ","ц"},
261 263
262}; 264};
265
263static GHashTable *special_hash; 266static GHashTable *special_hash;
264 267
265/* Array of strings for case conversion 268/* Array of strings for case conversion
266 * Even elements of array are strings of upper-case letters 269 * Even elements of array are strings of upper-case letters
267 * Odd elements of array are strings of lower-case letters, in the order corresponding to directly preceeding even element. 270 * Odd elements of array are strings of lower-case letters, in the order corresponding to directly preceeding even element.
325 char *tmp, *folded; 328 char *tmp, *folded;
326 tmp=g_utf8_find_next_char(src,NULL); 329 tmp=g_utf8_find_next_char(src,NULL);
327 charlen=tmp-src+1; 330 charlen=tmp-src+1;
328 g_strlcpy(buf,src,charlen>10?10:charlen); 331 g_strlcpy(buf,src,charlen>10?10:charlen);
329 folded=g_hash_table_lookup(casefold_hash,buf); 332 folded=g_hash_table_lookup(casefold_hash,buf);
333
330 if(folded) { 334 if(folded)
335 {
331 while(*folded && dest-ret<len) 336 while(*folded && dest-ret<len)
332 *dest++=*folded++; 337 *dest++=*folded++;
333 src=tmp; 338 src=tmp;
339 }
334 } else { 340 else
341 {
335 while(src<tmp && dest-ret<len) 342 while(src<tmp && dest-ret<len)
336 *dest++=*src++; 343 *dest++=*src++;
337 } 344 }
338 } 345 }
339 } 346 }
378 break; 385 break;
379 utf_boundary=tmp; 386 utf_boundary=tmp;
380 } 387 }
381 /* Push first mismatching char to the list if it's a special char */ 388 /* Push first mismatching char to the list if it's a special char */
382 sp=linguistics_get_special(utf_boundary,tmp); 389 sp=linguistics_get_special(utf_boundary,tmp);
390
383 if(sp){ 391 if(sp)
392 {
384 spp=g_new(struct special_pos,1); 393 spp=g_new(struct special_pos,1);
385 spp->variants=sp; 394 spp->variants=sp;
386 spp->n=1; 395 spp->n=1;
387 spp->s1=utf_boundary; 396 spp->s1=utf_boundary;
388 spp->s2=s2+(utf_boundary-s1); 397 spp->s2=s2+(utf_boundary-s1);
435linguistics_expand_special(char *str, int mode) 444linguistics_expand_special(char *str, int mode)
436{ 445{
437 char *in=str; 446 char *in=str;
438 char *out,*ret; 447 char *out,*ret;
439 int found=0; 448 int found=0;
449
450 if (!str)
451 {
452 return NULL;
453 }
454
440 out=ret=g_strdup(str); 455 ret=g_strdup(str);
456 out=ret;
441 457
442 if (!mode) 458 if (!mode)
459 {
443 return ret; 460 return ret;
461 }
444 462
445 while (*in) 463 while (*in)
446 { 464 {
447 char *next=g_utf8_find_next_char(in, NULL); 465 char *next=g_utf8_find_next_char(in, NULL);
448 int i,len=next-in; 466 int i,len=next-in;
457 const char *replace=special[i][mode]; 475 const char *replace=special[i][mode];
458 if (replace) 476 if (replace)
459 { 477 {
460 int replace_len=strlen(replace); 478 int replace_len=strlen(replace);
461 479
462 // dbg_assert(replace_len <= len);
463 if (replace_len <= len) 480 if (replace_len > len)
464 { 481 {
465 dbg(0,"found %s %s %d %s %d\n",in,search,len,replace,replace_len); 482 fprintf(stderr,"* ERROR !! ERROR !! found %s %s %d %s %d\n",in,search,len,replace,replace_len);
483 }
484 dbg_assert(replace_len <= len);
485 if (replace_len > len)
486 {
487 out+=len;
488 match=0;
489 break;
466 } 490 }
467 else 491 else
468 { 492 {
493 // fprintf(stderr," GOOD !! GOOD !! found %s %s %d %s %d\n",in,search,len,replace,replace_len);
469 strcpy(out, replace); 494 strcpy(out, replace);
470 out+=replace_len; 495 out+=replace_len;
471 match=1; 496 match=1;
472 break; 497 break;
473 } 498 }
490 } 515 }
491 } 516 }
492 *out++='\0'; 517 *out++='\0';
493 if (!found) 518 if (!found)
494 { 519 {
520 if (ret)
521 {
495 g_free(ret); 522 g_free(ret);
523 }
496 ret=NULL; 524 ret=NULL;
497 } 525 }
498 return ret; 526 return ret;
499} 527}
500 528
501char * 529char *
502linguistics_next_word(char *str) 530linguistics_next_word(char *str)
503{ 531{
532 char* ret=strtok(str, " -/()\"\',.;_[]{}\\");
533 return ret;
534
504 int len=strcspn(str, " -/()"); 535// int len=strcspn(str, " -/()");
505 if (!str[len] || !str[len+1]) 536// if (!str[len] || !str[len+1])
506 return NULL; 537// return NULL;
507 return str+len+1; 538// return str+len+1;
539
508} 540}
509 541
510int 542int
511linguistics_search(char *str) 543linguistics_search(char *str)
512{ 544{
543 int i; 575 int i;
544 special_hash=g_hash_table_new(g_str_hash, g_str_equal); 576 special_hash=g_hash_table_new(g_str_hash, g_str_equal);
545 casefold_hash=g_hash_table_new(g_str_hash, g_str_equal); 577 casefold_hash=g_hash_table_new(g_str_hash, g_str_equal);
546 578
547 for (i = 0 ; i < sizeof(special)/sizeof(special[0]); i++) 579 for (i = 0 ; i < sizeof(special)/sizeof(special[0]); i++)
580 {
548 g_hash_table_insert(special_hash,(gpointer)special[i][0],special[i]); 581 g_hash_table_insert(special_hash,(gpointer)special[i][0],special[i]);
582 }
549 583
550 for (i = 0 ; upperlower[i]; i+=2) { 584 for (i = 0 ; upperlower[i]; i+=2)
585 {
551 int j,k; 586 int j,k;
552 for(j=0,k=0;upperlower[i][j] && upperlower[i+1][k];) { 587 for(j=0,k=0;upperlower[i][j] && upperlower[i+1][k];)
588 {
553 char *s1=linguistics_dup_utf8_char(upperlower[i]+j); 589 char *s1=linguistics_dup_utf8_char(upperlower[i]+j);
554 char *s2=linguistics_dup_utf8_char(upperlower[i+1]+k); 590 char *s2=linguistics_dup_utf8_char(upperlower[i+1]+k);
555 g_hash_table_insert(casefold_hash,s1,s2); 591 g_hash_table_insert(casefold_hash,s1,s2);
556 j+=strlen(s1); 592 j+=strlen(s1);
557 k+=strlen(s2); 593 k+=strlen(s2);

Legend:
Removed from v.14  
changed lines
  Added in v.15

   
Visit the ZANavi Wiki