1 | //========================================================================= |
---|
2 | // FILENAME : textutils.c |
---|
3 | // DESCRIPTION : Misc. text utilities |
---|
4 | //========================================================================= |
---|
5 | // Copyright (c) 2008- NETGEAR, Inc. All Rights Reserved. |
---|
6 | //========================================================================= |
---|
7 | |
---|
8 | /* This program is free software; you can redistribute it and/or modify |
---|
9 | * it under the terms of the GNU General Public License as published by |
---|
10 | * the Free Software Foundation; either version 2 of the License, or |
---|
11 | * (at your option) any later version. |
---|
12 | * |
---|
13 | * This program is distributed in the hope that it will be useful, |
---|
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
16 | * GNU General Public License for more details. |
---|
17 | * |
---|
18 | * You should have received a copy of the GNU General Public License |
---|
19 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
20 | */ |
---|
21 | |
---|
22 | |
---|
23 | #include <stdlib.h> |
---|
24 | #include <stdio.h> |
---|
25 | #include <string.h> |
---|
26 | #include <ctype.h> |
---|
27 | |
---|
28 | #include "misc.h" |
---|
29 | #include "textutils.h" |
---|
30 | #include "../log.h" |
---|
31 | |
---|
32 | static unsigned int |
---|
33 | _char_htoi(char h) |
---|
34 | { |
---|
35 | if (h<'0') |
---|
36 | return 0; |
---|
37 | if (h<='9') |
---|
38 | return h-'0'; |
---|
39 | if (h<'A') |
---|
40 | return 0; |
---|
41 | if (h<='F') |
---|
42 | return h-'A'+10; |
---|
43 | if (h<'a') |
---|
44 | return 0; |
---|
45 | if (h<='f') |
---|
46 | return h-'a'+10; |
---|
47 | return 0; |
---|
48 | } |
---|
49 | |
---|
50 | void |
---|
51 | urldecode(char *src) |
---|
52 | { |
---|
53 | char c, *s, *d; |
---|
54 | |
---|
55 | for (d=s=src; *s; s++, d++) { |
---|
56 | c = *s; |
---|
57 | if (c=='%') { |
---|
58 | c = *++s; |
---|
59 | if (c=='%') |
---|
60 | c = '%'; |
---|
61 | else { |
---|
62 | c = _char_htoi(c)<<4 | _char_htoi(*++s); |
---|
63 | } |
---|
64 | *d = c; |
---|
65 | } |
---|
66 | else { |
---|
67 | *d = c; |
---|
68 | } |
---|
69 | } |
---|
70 | *d = '\0'; |
---|
71 | } |
---|
72 | |
---|
73 | #if 0 |
---|
74 | static int |
---|
75 | is_ignoredword(const char *str) |
---|
76 | { |
---|
77 | int i; |
---|
78 | |
---|
79 | if (!prefs.ignoredwords) |
---|
80 | return 0; |
---|
81 | |
---|
82 | for (i=0; prefs.ignoredwords[i].n; i++) { |
---|
83 | if (!(strncasecmp(prefs.ignoredwords[i].word, str, prefs.ignoredwords[i].n))) { |
---|
84 | char next_char = str[prefs.ignoredwords[i].n]; |
---|
85 | if (isalnum(next_char)) |
---|
86 | continue; |
---|
87 | return prefs.ignoredwords[i].n; |
---|
88 | } |
---|
89 | } |
---|
90 | return 0; |
---|
91 | } |
---|
92 | #endif |
---|
93 | |
---|
94 | char * |
---|
95 | skipspaces(const char *str) |
---|
96 | { |
---|
97 | while (isspace(*str)) str++; |
---|
98 | return (char*) str; |
---|
99 | } |
---|
100 | |
---|
101 | /* |
---|
102 | U+0040 (40): @ A B C D E F G H I J K L M N O |
---|
103 | U+0050 (50): P Q R S T U V W X Y Z [ \ ] ^ _ |
---|
104 | U+0060 (60): ` a b c d e f g h i j k l m n o |
---|
105 | U+0070 (70): p q r s t u v w x y z { | } ~ |
---|
106 | |
---|
107 | U+00c0 (c3 80): À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï |
---|
108 | U+00d0 (c3 90): Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß |
---|
109 | U+00e0 (c3 a0): à á â ã ä å æ ç è é ê ë ì í î ï |
---|
110 | U+00f0 (c3 b0): ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ |
---|
111 | U+0100 (c4 80): Ā ā Ă ă Ą ą Ć ć Ĉ ĉ Ċ ċ Č č Ď ď |
---|
112 | U+0110 (c4 90): Đ đ Ē ē Ĕ ĕ Ė ė Ę ę Ě ě Ĝ ĝ Ğ ğ |
---|
113 | U+0120 (c4 a0): Ġ ġ Ģ ģ Ĥ ĥ Ħ ħ Ĩ ĩ Ī ī Ĭ ĭ Į į |
---|
114 | U+0130 (c4 b0): İ ı IJ ij Ĵ ĵ Ķ ķ ĸ Ĺ ĺ Ļ ļ Ľ ľ Ŀ |
---|
115 | U+0140 (c5 80): ŀ Ł ł Ń ń Ņ ņ Ň ň ʼn Ŋ ŋ Ō ō Ŏ ŏ |
---|
116 | U+0150 (c5 90): Ő ő Œ œ Ŕ ŕ Ŗ ŗ Ř ř Ś ś Ŝ ŝ Ş ş |
---|
117 | U+0160 (c5 a0): Š š Ţ ţ Ť ť Ŧ ŧ Ũ ũ Ū ū Ŭ ŭ Ů ů |
---|
118 | U+0170 (c5 b0): Ű ű Ų ų Ŵ ŵ Ŷ ŷ Ÿ Ź ź Ż ż Ž ž ſ |
---|
119 | */ |
---|
120 | |
---|
121 | // conversion table for latin diacritical char to ascii one char or two chars. |
---|
122 | unsigned short UtoAscii[] = { |
---|
123 | // U+00c0 |
---|
124 | 0x0041,0x0041,0x0041,0x0041, 0x0041,0x0041,0x4145,0x0043, 0x0045,0x0045,0x0045,0x0045, 0x0049,0x0049,0x0049,0x0049, |
---|
125 | 0x0044,0x004e,0x004f,0x004f, 0x004f,0x004f,0x004f,0xc397, 0xc398,0x0055,0x0055,0x0055, 0x0055,0x0059,0x0050,0x5353, |
---|
126 | // U+00e0 |
---|
127 | 0x0041,0x0041,0x0041,0x0041, 0x0041,0x0041,0x4145,0x0043, 0x0045,0x0045,0x0045,0x0045, 0x0049,0x0049,0x0049,0x0049, |
---|
128 | 0x0044,0x004e,0x004f,0x004f, 0x004f,0x004f,0x004f,0xc397, 0xc398,0x0055,0x0055,0x0055, 0x0055,0x0059,0x0050,0x5353, |
---|
129 | // U+0100 |
---|
130 | 0x0041,0x0041,0x0041,0x0041, 0x0041,0x0041,0x0043,0x0043, 0x0043,0x0043,0x0043,0x0043, 0x0043,0x0043,0x0044,0x0044, |
---|
131 | 0x0044,0x0044,0x0045,0x0045, 0x0045,0x0045,0x0045,0x0045, 0x0045,0x0045,0x0045,0x0045, 0x0047,0x0047,0x0047,0x0047, |
---|
132 | // U+0120 |
---|
133 | 0x0047,0x0047,0x0047,0x0047, 0x0048,0x0048,0x0048,0x0048, 0x0049,0x0049,0x0049,0x0049, 0x0049,0x0049,0x0049,0x0049, |
---|
134 | 0x0049,0x0049,0x494a,0x494a, 0x004a,0x004a,0x004b,0x004b, 0x004b,0x004c,0x004c,0x004c, 0x004c,0x004c,0x004c,0x004c, |
---|
135 | // U+0140 |
---|
136 | 0x004c,0x004c,0x004c,0x004e, 0x004e,0x004e,0x004e,0x004e, 0x004e,0x004e,0x004e,0x004e, 0x004f,0x004f,0x004f,0x004f, |
---|
137 | 0x004f,0x004f,0x4f45,0x4f45, 0x0052,0x0052,0x0052,0x0052, 0x0052,0x0052,0x0053,0x0053, 0x0053,0x0053,0x0053,0x0053, |
---|
138 | // U+0160 |
---|
139 | 0x0053,0x0053,0x0054,0x0054, 0x0054,0x0054,0x0054,0x0054, 0x0055,0x0055,0x0055,0x0055, 0x0055,0x0055,0x0055,0x0055, |
---|
140 | 0x0055,0x0055,0x0055,0x0055, 0x0057,0x0057,0x0059,0x0059, 0x0059,0x005a,0x005a,0x005a, 0x005a,0x005a,0x005a,0xc5bf |
---|
141 | }; |
---|
142 | |
---|
143 | // conversion table for toupper() function for latin diacritical char |
---|
144 | unsigned short UtoUpper[] = { |
---|
145 | // U+00c0 |
---|
146 | 0xc380,0xc381,0xc382,0xc383, 0xc384,0xc385,0xc386,0xc387, 0xc388,0xc389,0xc38a,0xc38b, 0xc38c,0xc38d,0xc38e,0xc38f, |
---|
147 | 0xc390,0xc391,0xc392,0xc393, 0xc394,0xc395,0xc396,0xc397, 0xc398,0xc399,0xc39a,0xc39b, 0xc39c,0xc39d,0xc39e,0x5353, |
---|
148 | // U+00e0 |
---|
149 | 0xc380,0xc381,0xc382,0xc383, 0xc384,0xc385,0xc386,0xc387, 0xc388,0xc389,0xc38a,0xc38b, 0xc38c,0xc38d,0xc38e,0xc38f, |
---|
150 | 0xc390,0xc391,0xc392,0xc393, 0xc394,0xc395,0xc396,0xc397, 0xc398,0xc399,0xc39a,0xc39b, 0xc39c,0xc39d,0xc39e,0xc39f, |
---|
151 | // U+0100 |
---|
152 | 0xc480,0xc480,0xc482,0xc482, 0xc484,0xc484,0xc486,0xc486, 0xc488,0xc488,0xc48a,0xc48a, 0xc48c,0xc48c,0xc48e,0xc48e, |
---|
153 | 0xc490,0xc490,0xc492,0xc492, 0xc494,0xc494,0xc496,0xc496, 0xc498,0xc498,0xc49a,0xc49a, 0xc49c,0xc49c,0xc49e,0xc49e, |
---|
154 | // U+0120 |
---|
155 | 0xc4a0,0xc4a0,0xc4a2,0xc4a2, 0xc4a4,0xc4a4,0xc4a6,0xc4a6, 0xc4a8,0xc4a8,0xc4aa,0xc4aa, 0xc4ac,0xc4ac,0xc4ae,0xc4ae, |
---|
156 | 0xc4b0,0xc4b0,0xc4b2,0xc4b2, 0xc4b4,0xc4b4,0xc4b6,0xc4b6, 0xc4b8,0xc4b9,0xc4b9,0xc4bb, 0xc4bb,0xc4bd,0xc4bd,0xc4bf, |
---|
157 | // U+0140 |
---|
158 | 0xc4bf,0xc581,0xc581,0xc583, 0xc583,0xc585,0xc585,0xc587, 0xc587,0xc589,0xc58a,0xc58a, 0xc58c,0xc58c,0xc58e,0xc58e, |
---|
159 | 0xc590,0xc591,0xc592,0xc593, 0xc594,0xc595,0xc596,0xc597, 0xc598,0xc599,0xc59a,0xc59b, 0xc59c,0xc59d,0xc59e,0xc59f, |
---|
160 | // U+0160 |
---|
161 | 0xc5a0,0xc5a0,0xc5a2,0xc5a2, 0xc5a4,0xc5a4,0xc5a6,0xc5a6, 0xc5a8,0xc5a8,0xc5aa,0xc5aa, 0xc5ac,0xc5ac,0xc5ae,0xc5ae, |
---|
162 | 0xc5b0,0xc5b1,0xc5b2,0xc5b3, 0xc5b4,0xc5b5,0xc5b6,0xc5b7, 0xc5b8,0xc5b9,0xc5b9,0xc5bb, 0xc5bc,0xc5bd,0xc5bd,0xc5bf, |
---|
163 | }; |
---|
164 | |
---|
165 | |
---|
166 | int |
---|
167 | safe_atoi(char *s) |
---|
168 | { |
---|
169 | if (!s) |
---|
170 | return 0; |
---|
171 | if ((s[0]>='0' && s[0]<='9') || s[0]=='-' || s[0]=='+') |
---|
172 | return atoi(s); |
---|
173 | return 0; |
---|
174 | } |
---|
175 | |
---|
176 | // NOTE: support U+0000 ~ U+FFFF only. |
---|
177 | int |
---|
178 | utf16le_to_utf8(char *dst, int n, __u16 utf16le) |
---|
179 | { |
---|
180 | __u16 wc = le16_to_cpu(utf16le); |
---|
181 | if (wc < 0x80) { |
---|
182 | if (n<1) return 0; |
---|
183 | *dst++ = wc & 0xff; |
---|
184 | return 1; |
---|
185 | } |
---|
186 | else if (wc < 0x800) { |
---|
187 | if (n<2) return 0; |
---|
188 | *dst++ = 0xc0 | (wc>>6); |
---|
189 | *dst++ = 0x80 | (wc & 0x3f); |
---|
190 | return 2; |
---|
191 | } |
---|
192 | else { |
---|
193 | if (n<3) return 0; |
---|
194 | *dst++ = 0xe0 | (wc>>12); |
---|
195 | *dst++ = 0x80 | ((wc>>6) & 0x3f); |
---|
196 | *dst++ = 0x80 | (wc & 0x3f); |
---|
197 | return 3; |
---|
198 | } |
---|
199 | } |
---|
200 | |
---|
201 | void |
---|
202 | fetch_string_txt(char *fname, char *lang, int n, ...) |
---|
203 | { |
---|
204 | va_list args; |
---|
205 | char **keys; |
---|
206 | char ***strs; |
---|
207 | char **defstr; |
---|
208 | int i; |
---|
209 | FILE *fp; |
---|
210 | char buf[4096]; |
---|
211 | int state; |
---|
212 | char *p; |
---|
213 | char *langid; |
---|
214 | const char *lang_en = "EN"; |
---|
215 | |
---|
216 | if (!(keys = malloc(sizeof(keys) * n))) { |
---|
217 | DPRINTF(E_FATAL, L_SCANNER, "Out of memory\n"); |
---|
218 | } |
---|
219 | if (!(strs = malloc(sizeof(strs) * n))) { |
---|
220 | DPRINTF(E_FATAL, L_SCANNER, "Out of memory\n"); |
---|
221 | } |
---|
222 | if (!(defstr = malloc(sizeof(defstr) * n))) { |
---|
223 | DPRINTF(E_FATAL, L_SCANNER, "Out of memory\n"); |
---|
224 | } |
---|
225 | |
---|
226 | va_start(args, n); |
---|
227 | for (i=0; i<n; i++) { |
---|
228 | keys[i] = va_arg(args, char *); |
---|
229 | strs[i] = va_arg(args, char **); |
---|
230 | defstr[i] = va_arg(args, char *); |
---|
231 | } |
---|
232 | va_end(args); |
---|
233 | |
---|
234 | if (!(fp = fopen(fname, "rb"))) { |
---|
235 | DPRINTF(E_ERROR, L_SCANNER, "Cannot open <%s>\n", fname); |
---|
236 | goto _exit; |
---|
237 | } |
---|
238 | |
---|
239 | state = -1; |
---|
240 | while (fgets(buf, sizeof(buf), fp)) { |
---|
241 | int len = strlen(buf); |
---|
242 | |
---|
243 | if (buf[len-1]=='\n') buf[len-1] = '\0'; |
---|
244 | |
---|
245 | if (state<0) { |
---|
246 | if (isalpha(buf[0])) { |
---|
247 | for (i=0; i<n; i++) { |
---|
248 | if (!(strcmp(keys[i], buf))) { |
---|
249 | state = i; |
---|
250 | break; |
---|
251 | } |
---|
252 | } |
---|
253 | } |
---|
254 | } |
---|
255 | else { |
---|
256 | int found = 0; |
---|
257 | |
---|
258 | if (isalpha(buf[0]) || buf[0]=='\0') { |
---|
259 | state = -1; |
---|
260 | continue; |
---|
261 | } |
---|
262 | |
---|
263 | p = buf; |
---|
264 | while (isspace(*p)) p++; |
---|
265 | if (*p == '\0') { |
---|
266 | state = -1; |
---|
267 | continue; |
---|
268 | } |
---|
269 | langid = p; |
---|
270 | while (!isspace(*p)) p++; |
---|
271 | *p++ = '\0'; |
---|
272 | |
---|
273 | if (!strcmp(lang, langid)) |
---|
274 | found = 1; |
---|
275 | else if (strcmp(lang_en, langid)) |
---|
276 | continue; |
---|
277 | |
---|
278 | while (isspace(*p)) p++; |
---|
279 | if (*strs[state]) |
---|
280 | free(*strs[state]); |
---|
281 | *strs[state] = strdup(p); |
---|
282 | |
---|
283 | if (found) |
---|
284 | state = -1; |
---|
285 | } |
---|
286 | } |
---|
287 | |
---|
288 | for (i=0; i<n; i++) { |
---|
289 | if (!*strs[i]) |
---|
290 | *strs[i] = defstr[i]; |
---|
291 | } |
---|
292 | fclose(fp); |
---|
293 | |
---|
294 | _exit: |
---|
295 | free(keys); |
---|
296 | free(strs); |
---|
297 | free(defstr); |
---|
298 | } |
---|