Line data Source code
1 : /**
2 : * @file gensvm_strutil.c
3 : * @author G.J.J. van den Burg
4 : * @date 2014-01-07
5 : * @brief Utility functions for dealing with strings
6 : *
7 : * @details
8 : * This file contains functions for reading files, reading strings from a
9 : * format and checking start and ends of strings.
10 : *
11 : * @copyright
12 : Copyright 2016, G.J.J. van den Burg.
13 :
14 : This file is part of GenSVM.
15 :
16 : GenSVM is free software: you can redistribute it and/or modify
17 : it under the terms of the GNU General Public License as published by
18 : the Free Software Foundation, either version 3 of the License, or
19 : (at your option) any later version.
20 :
21 : GenSVM is distributed in the hope that it will be useful,
22 : but WITHOUT ANY WARRANTY; without even the implied warranty of
23 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 : GNU General Public License for more details.
25 :
26 : You should have received a copy of the GNU General Public License
27 : along with GenSVM. If not, see <http://www.gnu.org/licenses/>.
28 :
29 : */
30 :
31 : #include "gensvm_strutil.h"
32 : #include "gensvm_print.h"
33 :
34 : /**
35 : * @brief Check if a string starts with a prefix
36 : *
37 : * @param[in] str string
38 : * @param[in] pre prefix
39 : * @returns boolean, true if string starts with prefix, false
40 : * otherwise
41 : */
42 2 : bool str_startswith(const char *str, const char *pre)
43 : {
44 2 : size_t lenpre = strlen(pre),
45 2 : lenstr = strlen(str);
46 2 : return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
47 : }
48 :
49 : /**
50 : * @brief Check if a string ends with a suffix
51 : *
52 : * @param[in] str string
53 : * @param[in] suf suffix
54 : * @returns boolean, true if string ends with suffix, false
55 : * otherwise
56 : */
57 2 : bool str_endswith(const char *str, const char *suf)
58 : {
59 2 : size_t lensuf = strlen(suf),
60 2 : lenstr = strlen(str);
61 2 : return lenstr < lensuf ? false : strncmp(str + lenstr - lensuf, suf,
62 : lensuf) == 0;
63 : }
64 :
65 : /**
66 : * @brief Check if a string contains a char
67 : *
68 : * @details
69 : * Simple utility function to check if a char occurs in a string.
70 : *
71 : * @param[in] str input string
72 : * @param[in] c character
73 : *
74 : * @return number of times c occurs in str
75 : */
76 316 : bool str_contains_char(const char *str, const char c)
77 : {
78 316 : size_t i, len = strlen(str);
79 700 : for (i=0; i<len; i++)
80 492 : if (str[i] == c)
81 108 : return true;
82 208 : return false;
83 : }
84 :
85 : /**
86 : * @brief Count the number of times a string contains any character of another
87 : *
88 : * @details
89 : * This function is used to count the number of expected parts in the function
90 : * str_split(). It counts the number of times a character from a string of
91 : * characters is present in an input string.
92 : *
93 : * @param[in] str input string
94 : * @param[in] chars characters to count
95 : *
96 : * @return number of times any character from chars occurs in str
97 : *
98 : */
99 148 : int count_str_occurrences(const char *str, const char *chars)
100 : {
101 148 : size_t i, j, len_str = strlen(str),
102 148 : len_chars = strlen(chars);
103 148 : int count = 0;
104 4284 : for (i=0; i<len_str; i++) {
105 10410 : for (j=0; j<len_chars; j++) {
106 6274 : count += (str[i] == chars[j]);
107 : }
108 : }
109 148 : return count;
110 : }
111 :
112 : /**
113 : * @brief Split a string on delimiters and return an array of parts
114 : *
115 : * @details
116 : * This function takes as input a string and a string of delimiters. As
117 : * output, it gives an array of the parts of the first string, splitted on the
118 : * characters in the second string. The input string is not changed, and the
119 : * output contains all copies of the input string parts.
120 : *
121 : * @note
122 : * The code is based on: http://stackoverflow.com/a/9210560
123 : *
124 : * @param[in] original string you wish to split
125 : * @param[in] delims string with delimiters to split on
126 : * @param[out] len_ret length of the output array
127 : *
128 : * @return array of string parts
129 : */
130 148 : char **str_split(char *original, const char *delims, int *len_ret)
131 148 : {
132 148 : char *copy = NULL,
133 148 : *token = NULL,
134 148 : **result = NULL;
135 : int i, count;
136 :
137 148 : size_t len = strlen(original);
138 148 : size_t n_delim = strlen(delims);
139 :
140 : // add the null terminator to the delimiters
141 148 : char all_delim[1 + n_delim];
142 346 : for (i=0; i<n_delim; i++)
143 198 : all_delim[i] = delims[i];
144 148 : all_delim[n_delim] = '\0';
145 :
146 : // number of occurances of the delimiters
147 148 : count = count_str_occurrences(original, delims);
148 :
149 : // extra count in case there is a delimiter at the end
150 148 : count += (str_contains_char(delims, original[len - 1]));
151 :
152 : // extra count for the null terminator
153 148 : count++;
154 :
155 : // allocate the result array
156 148 : result = Calloc(char *, count);
157 :
158 : // tokenize a copy of the original string and keep the splits
159 148 : i = 0;
160 148 : copy = Calloc(char, len + 1);
161 148 : strcpy(copy, original);
162 148 : token = strtok(copy, all_delim);
163 630 : while (token) {
164 334 : result[i] = Calloc(char, strlen(token) + 1);
165 334 : strcpy(result[i], token);
166 334 : i++;
167 :
168 334 : token = strtok(NULL, all_delim);
169 : }
170 148 : free(copy);
171 :
172 148 : *len_ret = i;
173 :
174 148 : return result;
175 : }
176 :
177 : /**
178 : * @brief Move to next line in file
179 : *
180 : * @param[in] fid File opened for reading
181 : * @param[in] filename name of the file pointed to by fid
182 : */
183 10 : void next_line(FILE *fid, char *filename)
184 : {
185 : char buffer[GENSVM_MAX_LINE_LENGTH];
186 10 : get_line(fid, filename, buffer);
187 10 : }
188 :
189 : /**
190 : * @brief Read line to buffer
191 : *
192 : * @param[in] fid File opened for reading
193 : * @param[in] filename name of the file
194 : * @param[in,out] buffer allocated buffer to read to
195 : */
196 27 : char *get_line(FILE *fid, char *filename, char *buffer)
197 : {
198 27 : char *retval = fgets(buffer, GENSVM_MAX_LINE_LENGTH, fid);
199 27 : if (retval == NULL) {
200 1 : err("[GenSVM Error]: Error reading from file %s\n", filename);
201 : }
202 27 : return retval;
203 : }
204 :
205 : /**
206 : * @brief Read a double from file following a format
207 : *
208 : * @details
209 : * This function reads a double value from a file. If no value can be found, a
210 : * warning is printed to stderr, and NAN is returned.
211 : *
212 : * @param[in] fid File opened for reading
213 : * @param[in] filename Name of the file
214 : * @param[in] fmt Format containing a float format
215 : * @returns value read (if any)
216 : */
217 7 : double get_fmt_double(FILE *fid, char *filename, const char *fmt)
218 : {
219 : char buffer[GENSVM_MAX_LINE_LENGTH];
220 7 : double value = NAN;
221 : int retval;
222 :
223 7 : get_line(fid, filename, buffer);
224 7 : retval = sscanf(buffer, fmt, &value);
225 7 : if (retval == 0)
226 1 : err("[GenSVM Error]: No double read from file.\n");
227 7 : return value;
228 : }
229 :
230 : /**
231 : * @brief Read a long integer from file following a format
232 : *
233 : * @param[in] fid File opened for reading
234 : * @param[in] filename Name of the file
235 : * @param[in] fmt Format containing a long integer format
236 : * @returns value read (if any)
237 : */
238 7 : long get_fmt_long(FILE *fid, char *filename, const char *fmt)
239 : {
240 : char buffer[GENSVM_MAX_LINE_LENGTH];
241 7 : long value = 0;
242 : int retval;
243 :
244 7 : get_line(fid, filename, buffer);
245 7 : retval = sscanf(buffer, fmt, &value);
246 7 : if (retval == 0)
247 1 : err("[GenSVM Error]: No long read from file.\n");
248 7 : return value;
249 : }
250 :
251 : /**
252 : * @brief Read all doubles in a given buffer
253 : *
254 : * @details
255 : * This function is used to read a line of doubles from a buffer. All the
256 : * doubles found are stored in a pre-allocated array.
257 : *
258 : * @param[in] buffer a string buffer
259 : * @param[in] offset an offset of the string to start looking for
260 : * doubles
261 : * @param[in] all_doubles pre-allocated array of doubles (should be large
262 : * enough)
263 : * @returns number of doubles read
264 : */
265 3 : long all_doubles_str(char *buffer, long offset, double *all_doubles)
266 : {
267 : double value;
268 3 : long i = 0;
269 3 : char *start = NULL,
270 3 : *end = NULL;
271 :
272 3 : start = buffer + offset;
273 : while (true) {
274 21 : value = strtod(start, &end);
275 12 : if (start != end) {
276 9 : all_doubles[i] = value;
277 9 : i++;
278 : } else
279 3 : break;
280 9 : start = end;
281 9 : end = NULL;
282 : }
283 :
284 3 : return i;
285 : }
286 :
287 : /**
288 : * @brief Read all longs in a given buffer
289 : *
290 : * @details
291 : * This function is used to read a line of longs from a buffer. All the
292 : * longs found are stored in a pre-allocated array.
293 : *
294 : * @param[in] buffer a string buffer
295 : * @param[in] offset an offset of the string to start looking for
296 : * longs
297 : * @param[in] all_longs pre-allocated array of longs (should be large
298 : * enough)
299 : * @returns number of longs read
300 : */
301 3 : long all_longs_str(char *buffer, long offset, long *all_longs)
302 : {
303 : long value;
304 3 : long i = 0;
305 3 : char *start = NULL,
306 3 : *end = NULL;
307 :
308 3 : start = buffer + offset;
309 : while (true) {
310 21 : value = strtol(start, &end, 10);
311 12 : if (start != end) {
312 9 : all_longs[i] = value;
313 9 : i++;
314 : } else
315 3 : break;
316 9 : start = end;
317 9 : end = NULL;
318 : }
319 :
320 3 : return i;
321 : }
|