21 SG_UNSTABLE(
"CStreamingAsciiFile::CStreamingAsciiFile()",
"\n")
37 #define GET_VECTOR(fname, conv, sg_type) \ 38 void CStreamingAsciiFile::get_vector(sg_type*& vector, int32_t& num_feat) \ 40 char* buffer = NULL; \ 42 int32_t old_len = num_feat; \ 45 bytes_read = buf->read_line(buffer); \ 59 char* ptr_item=NULL; \ 60 char* ptr_data=buffer; \ 61 DynArray<char*>* items=new DynArray<char*>(); \ 65 if ((*ptr_data=='\n') || \ 66 (ptr_data - buffer >= bytes_read)) \ 71 append_item(items, ptr_data, ptr_item); \ 78 else if (!isblank(*ptr_data) && !ptr_item) \ 82 else if (isblank(*ptr_data) && ptr_item) \ 84 append_item(items, ptr_data, ptr_item); \ 92 SG_DEBUG("num_feat %d\n", num_feat) \ 95 if (old_len < num_feat) \ 96 vector=SG_REALLOC(sg_type, vector, old_len, num_feat); \ 98 for (int32_t i=0; i<num_feat; i++) \ 100 char* item=items->get_element(i); \ 101 vector[i]=conv(item); \ 121 #define GET_FLOAT_VECTOR(sg_type) \ 122 void CStreamingAsciiFile::get_vector(sg_type*& vector, int32_t& len)\ 126 int32_t num_chars = buf->read_line(line); \ 127 int32_t old_len = len; \ 129 if (num_chars == 0) \ 136 substring example_string = {line, line + num_chars}; \ 138 CCSVFile::tokenize(m_delimiter, example_string, words); \ 140 len = words.index(); \ 141 substring* feature_start = &words[0]; \ 144 vector = SG_REALLOC(sg_type, vector, old_len, len); \ 147 for (substring* i = feature_start; i != words.end; i++) \ 149 vector[j++] = SGIO::float_of_substring(*i); \ 156 #undef GET_FLOAT_VECTOR 160 #define GET_VECTOR_AND_LABEL(fname, conv, sg_type) \ 161 void CStreamingAsciiFile::get_vector_and_label(sg_type*& vector, int32_t& num_feat, float64_t& label) \ 163 char* buffer = NULL; \ 164 ssize_t bytes_read; \ 165 int32_t old_len = num_feat; \ 168 bytes_read = buf->read_line(buffer); \ 182 char* ptr_item=NULL; \ 183 char* ptr_data=buffer; \ 184 DynArray<char*>* items=new DynArray<char*>(); \ 188 if ((*ptr_data=='\n') || \ 189 (ptr_data - buffer >= bytes_read)) \ 194 append_item(items, ptr_data, ptr_item); \ 201 else if (!isblank(*ptr_data) && !ptr_item) \ 205 else if (isblank(*ptr_data) && ptr_item) \ 207 append_item(items, ptr_data, ptr_item); \ 215 SG_DEBUG("num_feat %d\n", num_feat) \ 217 label=atof(items->get_element(0)); \ 219 if (old_len < num_feat - 1) \ 220 vector=SG_REALLOC(sg_type, vector, old_len, num_feat-1); \ 222 for (int32_t i=1; i<num_feat; i++) \ 224 char* item=items->get_element(i); \ 225 vector[i-1]=conv(item); \ 244 #undef GET_VECTOR_AND_LABEL 246 #define GET_FLOAT_VECTOR_AND_LABEL(sg_type) \ 247 void CStreamingAsciiFile::get_vector_and_label(sg_type*& vector, int32_t& len, float64_t& label) \ 251 int32_t num_chars = buf->read_line(line); \ 252 int32_t old_len = len; \ 254 if (num_chars == 0) \ 261 substring example_string = {line, line + num_chars}; \ 263 CCSVFile::tokenize(m_delimiter, example_string, words); \ 265 label = SGIO::float_of_substring(words[0]); \ 267 len = words.index() - 1; \ 268 substring* feature_start = &words[1]; \ 271 vector = SG_REALLOC(sg_type, vector, old_len, len); \ 274 for (substring* i = feature_start; i != words.end; i++) \ 276 vector[j++] = SGIO::float_of_substring(*i); \ 283 #undef GET_FLOAT_VECTOR_AND_LABEL 287 #define GET_STRING(fname, conv, sg_type) \ 288 void CStreamingAsciiFile::get_string(sg_type*& vector, int32_t& len) \ 290 char* buffer = NULL; \ 291 ssize_t bytes_read; \ 294 bytes_read = buf->read_line(buffer); \ 304 SG_DEBUG("Line read from the file:\n%s\n", buffer) \ 306 if (buffer[bytes_read-1]=='\n') \ 309 buffer[bytes_read-1]='\0'; \ 313 vector=(sg_type *) buffer; \ 334 #define GET_STRING_AND_LABEL(fname, conv, sg_type) \ 335 void CStreamingAsciiFile::get_string_and_label(sg_type*& vector, int32_t& len, float64_t& label) \ 337 char* buffer = NULL; \ 338 ssize_t bytes_read; \ 341 bytes_read = buf->read_line(buffer); \ 351 int32_t str_start_pos=-1; \ 353 for (int32_t i=0; i<bytes_read; i++) \ 355 if (buffer[i] == ' ') \ 358 label=atoi(buffer); \ 365 if (str_start_pos == -1) \ 372 if (buffer[bytes_read-1]=='\n') \ 374 buffer[bytes_read-1]='\0'; \ 375 len=bytes_read-str_start_pos-1; \ 378 len=bytes_read-str_start_pos; \ 380 vector=(sg_type*) &buffer[str_start_pos]; \ 397 #undef GET_STRING_AND_LABEL 401 #define GET_SPARSE_VECTOR(fname, conv, sg_type) \ 402 void CStreamingAsciiFile::get_sparse_vector(SGSparseVectorEntry<sg_type>*& vector, int32_t& len) \ 404 char* buffer = NULL; \ 405 ssize_t bytes_read; \ 408 bytes_read = buf->read_line(buffer); \ 420 if (buffer[bytes_read-1]=='\n') \ 422 num_chars=bytes_read-1; \ 423 buffer[num_chars]='\0'; \ 426 num_chars=bytes_read; \ 428 int32_t num_dims=0; \ 429 for (int32_t i=0; i<num_chars; i++) \ 431 if (buffer[i]==':') \ 437 int32_t index_start_pos=-1; \ 438 int32_t feature_start_pos; \ 439 int32_t current_feat=0; \ 440 if (len < num_dims) \ 441 vector=SG_REALLOC(SGSparseVectorEntry<sg_type>, vector, len, num_dims); \ 442 for (int32_t i=0; i<num_chars; i++) \ 444 if (buffer[i]==':') \ 447 vector[current_feat].feat_index=(int32_t) atoi(buffer+index_start_pos)-1; \ 449 index_start_pos=-1; \ 451 feature_start_pos=i+1; \ 452 while ((buffer[i]!=' ') && (i<num_chars)) \ 458 vector[current_feat].entry=(sg_type) conv(buffer+feature_start_pos); \ 462 else if (buffer[i]==' ') \ 469 if (index_start_pos == -1) \ 491 #undef GET_SPARSE_VECTOR 495 #define GET_SPARSE_VECTOR_AND_LABEL(fname, conv, sg_type) \ 496 void CStreamingAsciiFile::get_sparse_vector_and_label(SGSparseVectorEntry<sg_type>*& vector, int32_t& len, float64_t& label) \ 498 char* buffer = NULL; \ 499 ssize_t bytes_read; \ 502 bytes_read = buf->read_line(buffer); \ 514 if (buffer[bytes_read-1]=='\n') \ 516 num_chars=bytes_read-1; \ 517 buffer[num_chars]='\0'; \ 520 num_chars=bytes_read; \ 522 int32_t num_dims=0; \ 523 for (int32_t i=0; i<num_chars; i++) \ 525 if (buffer[i]==':') \ 531 int32_t index_start_pos=-1; \ 532 int32_t feature_start_pos; \ 533 int32_t current_feat=0; \ 534 int32_t label_pos=-1; \ 535 if (len < num_dims) \ 536 vector=SG_REALLOC(SGSparseVectorEntry<sg_type>, vector, len, num_dims); \ 538 for (int32_t i=1; i<num_chars; i++) \ 540 if (buffer[i]==':') \ 544 if ( (buffer[i]==' ') && (buffer[i-1]!=' ') ) \ 548 label=atof(buffer); \ 554 SG_ERROR("No label found!\n") \ 556 buffer+=label_pos+1; \ 557 num_chars-=label_pos+1; \ 558 for (int32_t i=0; i<num_chars; i++) \ 560 if (buffer[i]==':') \ 563 vector[current_feat].feat_index=(int32_t) atoi(buffer+index_start_pos)-1; \ 565 index_start_pos=-1; \ 567 feature_start_pos=i+1; \ 568 while ((buffer[i]!=' ') && (i<num_chars)) \ 574 vector[current_feat].entry=(sg_type) conv(buffer+feature_start_pos); \ 578 else if (buffer[i]==' ') \ 585 if (index_start_pos == -1) \ 607 #undef GET_SPARSE_VECTOR_AND_LABEL 610 void CStreamingAsciiFile::append_item(
611 DynArray<T>* items,
char* ptr_data,
char* ptr_item)
613 REQUIRE(ptr_data && ptr_item,
"Data and Item to append should not be NULL\n");
615 size_t len=(ptr_data-ptr_item)/
sizeof(
char);
616 char* item=SG_MALLOC(
char, len+1);
617 memset(item, 0,
sizeof(
char)*(len+1));
618 item=strncpy(item, ptr_item, len);
620 SG_DEBUG(
"current %c, len %d, item %s\n", *ptr_data, len, item)
626 m_delimiter = delimiter;
#define GET_SPARSE_VECTOR(fname, conv, sg_type)
#define GET_STRING(fname, conv, sg_type)
void set_delimiter(char delimiter)
bool append_element(T element)
#define GET_VECTOR(fname, conv, sg_type)
virtual ~CStreamingAsciiFile()
A Streaming File access class.
#define GET_VECTOR_AND_LABEL(fname, conv, sg_type)
#define GET_FLOAT_VECTOR(sg_type)
Template Dynamic array class that creates an array that can be used like a list or an array...
#define GET_FLOAT_VECTOR_AND_LABEL(sg_type)
all of classes and functions are contained in the shogun namespace
#define GET_STRING_AND_LABEL(fname, conv, sg_type)
#define SG_UNSTABLE(func,...)
#define GET_SPARSE_VECTOR_AND_LABEL(fname, conv, sg_type)
bool str_to_bool(char *str)