13 #ifndef _CSTRINGFEATURES__H__ 14 #define _CSTRINGFEATURES__H__ 28 template <
class T>
class CDynamicArray;
33 #ifndef DOXYGEN_SHOULD_SKIP_THIS 34 struct SSKDoubleFeature
41 struct SSKTripleFeature
126 virtual void cleanup();
134 virtual void cleanup_feature_vector(int32_t num);
143 virtual void cleanup_feature_vectors(int32_t start, int32_t stop);
185 void set_feature_vector(
SGVector<ST> vector, int32_t num);
188 void enable_on_the_fly_preprocessing();
193 void disable_on_the_fly_preprocessing();
205 ST* get_feature_vector(int32_t num, int32_t& len,
bool& dofree);
228 SGString<ST>* get_transposed(int32_t &num_feat, int32_t &num_vec);
238 void free_feature_vector(ST* feat_vec, int32_t num,
bool dofree);
247 void free_feature_vector(
SGVector<ST> feat_vec, int32_t num);
257 virtual ST get_feature(int32_t vec_num, int32_t feat_num);
266 virtual int32_t get_vector_length(int32_t vec_num);
274 virtual int32_t get_max_vector_length();
277 virtual int32_t get_num_vectors()
const;
317 ST get_masked_symbols(ST symbol, uint8_t mask);
325 ST shift_offset(ST offset, int32_t amount);
333 ST shift_symbol(ST symbol, int32_t amount);
339 virtual void load(
CFile* loader);
351 void load_ascii_file(
char* fname,
bool remap_to_bin=
true,
362 bool load_fasta_file(
const char* fname,
bool ignore_invalid=
false);
373 bool load_fastq_file(
const char* fname,
374 bool ignore_invalid=
false,
bool bitremap_in_single_string=
false);
383 bool load_from_directory(
char* dirname);
401 bool set_features(
SGString<ST>* p_features, int32_t p_num_vectors,
402 int32_t p_max_string_length);
426 bool append_features(
SGString<ST>* p_features, int32_t p_num_vectors,
427 int32_t p_max_string_length);
442 virtual SGString<ST>* get_features(int32_t& num_str, int32_t& max_str_len);
452 virtual SGString<ST>* copy_features(int32_t& num_str, int32_t& max_str_len);
461 virtual void get_features(
SGString<ST>** dst, int32_t* num_str);
469 virtual void save(
CFile* writer);
479 virtual bool load_compressed(
char* src,
bool decompress);
497 virtual bool apply_preprocessor(
bool force_preprocessing=
false);
511 int32_t obtain_by_sliding_window(int32_t window_size, int32_t step_size, int32_t skip=0);
540 int32_t p_order, int32_t gap,
bool rev);
555 int32_t p_order, int32_t gap,
bool rev);
566 bool have_same_length(int32_t len=-1);
573 void embed_features(int32_t p_order);
581 void compute_symbol_mask_table(int64_t max_val);
589 void unembed_word(ST word, uint8_t* seq, int32_t len);
596 ST embed_word(ST* seq, int32_t len);
602 void determine_maximum_string_length();
611 static ST* get_zero_terminated_string_copy(
SGString<ST> str);
621 virtual void set_feature_vector(int32_t num, ST*
string, int32_t len);
627 virtual void get_histogram(
float64_t** hist, int32_t* rows, int32_t* cols,
628 bool normalize=
true);
634 virtual void create_random(
float64_t* hist, int32_t rows, int32_t cols,
648 virtual const char*
get_name()
const {
return "StringFeatures"; }
651 virtual void subset_changed_post();
664 virtual ST* compute_feature_vector(int32_t num, int32_t& len);
710 #endif // _CSTRINGFEATURES__H__
bool preprocess_on_get
preprocess on-the-fly?
RAWDNA - letters 0,1,2,3.
virtual const char * get_name() const
EAlphabet
Alphabet of charfeatures/observations.
SGString< ST > * features
The class Alphabet implements an alphabet and alphabet utility functions.
int32_t length_of_single_string
length of prior single string
floatmax_t num_symbols
number of used symbols
EFeatureClass
shogun feature class
int32_t symbol_mask_table_len
order used in higher order mapping
int32_t order
order used in higher order mapping
Template class StringFeatures implements a list of strings.
A File access base class.
CCache< ST > * feature_cache
EFeatureType
shogun feature type
all of classes and functions are contained in the shogun namespace
ST * symbol_mask_table
order used in higher order mapping
The class Features is the base class of all feature objects.
int32_t max_string_length
floatmax_t original_num_symbols
original number of used symbols (before higher order mapping)
template class SGStringList