ProteoWizard
BinaryDataEncoderTest.cpp
Go to the documentation of this file.
1//
2// $Id$
3//
4//
5// Original author: Darren Kessner <darren@proteowizard.org>
6//
7// Copyright 2007 Spielberg Family Center for Applied Proteomics
8// Cedars Sinai Medical Center, Los Angeles, California 90048
9//
10// Licensed under the Apache License, Version 2.0 (the "License");
11// you may not use this file except in compliance with the License.
12// You may obtain a copy of the License at
13//
14// http://www.apache.org/licenses/LICENSE-2.0
15//
16// Unless required by applicable law or agreed to in writing, software
17// distributed under the License is distributed on an "AS IS" BASIS,
18// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19// See the License for the specific language governing permissions and
20// limitations under the License.
21//
22
23
24#include "BinaryDataEncoder.hpp"
26#include "boost/filesystem.hpp"
28#include <cstring>
29
30
31using namespace pwiz::util;
32using namespace pwiz::cv;
33using namespace pwiz::msdata;
34namespace bfs = boost::filesystem;
35
36
37ostream* os_ = 0;
38
39
40double sampleData_[] =
41{
42 200.00018816645022000000, 0.00000000000000000000,
43 200.00043034083151000000, 0.00000000000000000000,
44 200.00067251579924000000, 0.00000000000000000000,
45 200.00091469135347000000, 0.00000000000000000000,
46 201.10647068550810000000, 0.00000000000000000000,
47 201.10671554643099000000, 0.00000000000000000000,
48 201.10696040795017000000, 0.00000000000000000000,
49 201.10720527006566000000, 0.00000000000000000000,
50 201.10745013277739000000, 908.68475341796875000000,
51 201.10769499608537000000, 1266.26928710937500000000,
52 201.10793985998967000000, 1258.11450195312500000000,
53 201.10818472449023000000, 848.79339599609375000000,
54 201.10842958958708000000, 0.00000000000000000000,
55 201.10867445528024000000, 0.00000000000000000000,
56 201.10891932156963000000, 0.0000000000000000000,
57 200, 0,
58 300, 1,
59 400, 10,
60 500, 100,
61 600, 1000,
62};
63
64
65const int sampleDataSize_ = sizeof(sampleData_)/sizeof(double);
66
67
68// regression test strings
69const char* sampleEncoded32Big_ = "Q0gADAAAAABDSAAcAAAAAENIACwAAAAAQ0gAPAAAAABDSRtCAAAAAENJG1IAAAAAQ0kbYgAAAABDSRtyAAAAAENJG4JEYyvTQ0kbkkSeSJ5DSRuiRJ1DqkNJG7JEVDLHQ0kbwgAAAABDSRvSAAAAAENJG+IAAAAAQ0gAAAAAAABDlgAAP4AAAEPIAABBIAAAQ/oAAELIAABEFgAARHoAAA==";
70const char* sampleEncoded32Little_ = "DABIQwAAAAAcAEhDAAAAACwASEMAAAAAPABIQwAAAABCG0lDAAAAAFIbSUMAAAAAYhtJQwAAAAByG0lDAAAAAIIbSUPTK2NEkhtJQ55InkSiG0lDqkOdRLIbSUPHMlREwhtJQwAAAADSG0lDAAAAAOIbSUMAAAAAAABIQwAAAAAAAJZDAACAPwAAyEMAACBBAAD6QwAAyEIAABZEAAB6RA==";
71const char* sampleEncoded64Little_ = "/xedigEAaUAAAAAAAAAAAIV5fYYDAGlAAAAAAAAAAACkK16CBQBpQAAAAAAAAAAAXy4/fgcAaUAAAAAAAAAAAK4HNjVoI2lAAAAAAAAAAACrvLg2aiNpQAAAAAAAAAAAnMM7OGwjaUAAAAAAAAAAAIIcvzluI2lAAAAAAAAAAABax0I7cCNpQAAAAGB6ZYxAJcTGPHIjaUAAAADAE8mTQOUSSz50I2lAAAAAQHWok0CYs88/diNpQAAAAOBYhopAP6ZUQXgjaUAAAAAAAAAAANvq2UJ6I2lAAAAAAAAAAABpgV9EfCNpQAAAAAAAAAAAAAAAAAAAaUAAAAAAAAAAAAAAAAAAwHJAAAAAAAAA8D8AAAAAAAB5QAAAAAAAACRAAAAAAABAf0AAAAAAAABZQAAAAAAAwIJAAAAAAABAj0A=";
72const char* sampleEncoded64Big_ = "QGkAAYqdF/8AAAAAAAAAAEBpAAOGfXmFAAAAAAAAAABAaQAFgl4rpAAAAAAAAAAAQGkAB34/Ll8AAAAAAAAAAEBpI2g1NgeuAAAAAAAAAABAaSNqNri8qwAAAAAAAAAAQGkjbDg7w5wAAAAAAAAAAEBpI245vxyCAAAAAAAAAABAaSNwO0LHWkCMZXpgAAAAQGkjcjzGxCVAk8kTwAAAAEBpI3Q+SxLlQJOodUAAAABAaSN2P8+zmECKhljgAAAAQGkjeEFUpj8AAAAAAAAAAEBpI3pC2erbAAAAAAAAAABAaSN8RF+BaQAAAAAAAAAAQGkAAAAAAAAAAAAAAAAAAEBywAAAAAAAP/AAAAAAAABAeQAAAAAAAEAkAAAAAAAAQH9AAAAAAABAWQAAAAAAAECCwAAAAAAAQI9AAAAAAAA=";
73const char* sampleEncoded32LittleZlib_ = "eJzjYfBwZgACGSitA6VtoLSTtCeYDoLSSVC6CEo3AenL2skuk4D0PI95LouA9CrnuS6bgPRxoxCXQ1B1l6D0IyjNADWfgWEakG6wZ2A4AaQVHBkYfgHpE04MDGIuDAxVLgB7LB3q";
74const char* sampleEncoded32BigZlib_ = "eJxz9mDgYQACZw8GGSitA6VtwLSntBOUDoLSSVC6CEo3uSRrXwbSk1zmecwD0otc5jqvAtKbXEKMjgPpQ1B1l6D0I6j5YOA8jYHBvgFIn2BgcFQA0r8YGJyAbBcxIK5iYAAAfLkd6g==";
75const char* sampleEncoded64LittleZlib_ = "eJz7Lz63i5Eh04EBClora9uYkfhLtOOaWJH48Xr2dexI/HXsZqYZygj+6j07zLKQ+HMOW1vkIPGbZPZb5iHxo447WRdA+AlVqT0OqkeO2RRB+AeET052eCrkbVcC4TuUrpjsMGPzefsyCP9BRFuXg/2yEMcKJPNuv7rpVIXEz2yMd6lB4kNFUfkHiqD8D/YQuhLKV4HQDvVQfiSEPtAEFe93AACmsz1+";
76const char* sampleEncoded64BigZlib_ = "eJxzyGRg7Jor/p8BChwyGZjbaitbkfisTXHaS5D47HX2evEIvnKGqRn7OiR+ltmOPauR+DkW1ofnIPHzLPfLNCHxC6ydjkc59KRWJUD4RTbHjqg6TD4pfADCL7HzFnrqMHlFqQOEX2Z/fvMMh662iAcQfoVjyDJ7JPOqnG6+uo3Er3GJb8xEcj8KcCg6AKbtP0D5lVBaBUrXO0DoSCi/CaLeoR8iDgC0Qj1+";
77const char* sampleEncodedNumpressLinear_ = "QS69PAAAAAAu7AEMAAAAAA9J0wgQ61LPfgY70wgQbTLPfg4d0wgQ7hLPfgMM1BgQwGKtfgvq1SgQ4UKtfgjc1SgQIyKtfgXO1SgQRAKtfgKw5SgQ78OG4QNVqQugf3Tmpg+6yRCARe2G9wiYdBGAecaFZgs+qjKwizv8oQVa5SgQS0GtfgJM5SgQjCGtfgwC5BgQApLPfgicxA4Q5MmQzQzK9+kgoDYaDQAvNdQwS+AZrAhzqAY5hKD/kA==";
78const char* sampleEncodedNumpressLinearZlib_ = "eJxz1NtrwwAEem8YeUA0v+dlDoHXQefr2KyBjFyj83V8skDGO6Hzdcw8VyQEDiStreN+dVVD4KHT2jqOO0CGstLaOtZzQIYL09o6pg1PNQTeH257yBy6kntBfcmzZfy7Tgo0uL5t+84xo0SwofJYaxq33SqjDd3WfxayRgEVezsCdfkAGT2Ka+t4mJ5ICDBNOl/HMecIn8CTkxPO8pz6/lJhgZkUL4O+6RUD7weSaziKV7BZtiz4PwEAkp1KXg==";
79const char* sampleEncodedNumpressSlof_ = "QMHqAAAAAAACvgAAAr4AAAK+AAACvgAANL4AADS+AAA0vgAANL4AADS+GvQ0vvr/NL6//zS+qfE0vgAANL4AADS+AAACvgAAeszWGMHW6VW73lqlQOWH9w==";
80const char* sampleEncodedNumpressSlofZlib_ = "eJxzOPiKAQSY9qFiEwws9cVk36//Jvv2A/HKj8hyIPVVZ65JHLz2MnT3vailDk/bvwMAn1ogtQ==";
81const char* sampleEncodedNumpressPic_ = "aMhoyGjIaMhpyGnIachpyGnF2DacUvRpxa5GnFFTachpyGnIaMhcIXFQkXpU8WRlhSWOMA==";
82const char* sampleEncodedNumpressPicZlib_ = "eJzLOJEBhpkwePSG2ZygL5lH17nNCQyGiGWciFEsDJhYFfIxJbVVtc8AAAjsG4c=";
83const char* sampleEncodedModified64BigZlib_ = "eJxzyGRg7Jor/r/+/X8wcMhkYG6rrWz9j+CzNsVpL6m/D+ez19nrxf+H85UzTM3Y1zFAAZCfZbZjz2okfo6F9eE5SPw8y/0yTUj8Amun41EOPalVCRB+kc2xI6oOk08KH4DwS+y8hZ46TF5R6gDhl9mf3zzDoast4gGEX+EYssweybwqp5uvbiPxa1ziGzMRfAYU4FB0AEzbf4DyK6G0CpSud4DQkVB+E0S9Qz9EHACREFv+";
84
85const char* regressionTest(const BinaryDataEncoder::Config& config,bool expectNumpressIgnored)
86{
87 if (expectNumpressIgnored) // when set, expecting numpress not to be used even though it was requested
88 {
90 }
91 else
92 {
95
98
101 }
106
110 return sampleEncoded32Big_;
111
116
120 return sampleEncoded64Big_;
121
126
131
136
141
142 throw runtime_error("[BinaryDataEncoderTest::regressionTest()] Untested configuration.");
143}
144
145
147{
148 BinaryDataEncoder::Config config(config_in);
149 if (os_)
150 *os_ << "testConfiguration: " << config << endl;
151
152 // initialize scan data
153
154 vector<double> binary(sampleDataSize_);
155 copy(sampleData_, sampleData_+sampleDataSize_, binary.begin());
156
157 bool checkNumpressMaxErrorSupression = (BinaryDataEncoder::Numpress_None != config.numpress)&&(config.numpressLinearErrorTolerance>0);
158 if (checkNumpressMaxErrorSupression)
159 {
160 binary[1] = numeric_limits<double>::max( )-.1; // attempt to blow out the numpress lossiness limiter
161 binary[3] = -binary[1]; // attempt to blow out the numpress lossiness limiter
162 binary[5] = .5*binary[1]; // attempt to blow out the numpress lossiness limiter
163 binary[7] = .5*binary[3]; // attempt to blow out the numpress lossiness limiter
164 }
165
166 if (os_)
167 {
168 *os_ << "original: " << binary.size() << endl;
169 *os_ << setprecision(20) << fixed;
170 copy(binary.begin(), binary.end(), ostream_iterator<double>(*os_, "\n"));
171 }
172
173 // instantiate encoder
174
175 BinaryDataEncoder encoder(config);
176
177 // encode
178
179 string encoded;
180 encoder.encode(binary, encoded);
181
182 if (os_)
183 *os_ << "encoded: " << encoded.size() << endl << encoded << endl;
184
185 // regression testing for encoding
186
187 unit_assert(encoded == regressionTest(config,checkNumpressMaxErrorSupression));
188
189 // decode
190
191 BinaryData<double> decoded;
192 encoder.decode(encoded, decoded);
193
194 if (os_)
195 {
196 *os_ << "decoded: " << decoded.size() << endl;
197 copy(decoded.begin(), decoded.end(), ostream_iterator<double>(*os_, "\n"));
198 }
199
200 // validate by comparing scan data before/after encode/decode
201
202 unit_assert(binary.size() == decoded.size());
203
204 const double epsilon = config.precision == BinaryDataEncoder::Precision_64 ? 1e-14 : 1e-5 ;
205
206 auto jt = decoded.begin();
207 switch (config.numpress)
208 {
212 // lossy compression
213 for (auto it = binary.begin(); it!=binary.end(); ++it, ++jt)
214 {
215 if (0==*it || 0==*jt)
216 unit_assert_equal(*it, *jt, 0.1);
217 else if (*it > *jt)
218 unit_assert((*jt)/(*it) > .999 );
219 else
220 unit_assert((*it)/(*jt) > .999 );
221 }
222 break;
223 default:
224 for (auto it = binary.begin(); it!=binary.end(); ++it, ++jt)
225 {
226 unit_assert_equal(*it, *jt, epsilon);
227 }
228 break;
229 }
230 if (os_) *os_ << "validated with epsilon: " << fixed << setprecision(1) << scientific << epsilon << "\n\n";
231}
232
233
234void test()
235{
237
240 testConfiguration(config);
241
244 testConfiguration(config);
245
248 testConfiguration(config);
249
252 testConfiguration(config);
253
257 testConfiguration(config);
258
262 testConfiguration(config);
263
267 testConfiguration(config);
268
272 testConfiguration(config);
273
274 // test the numpress stuff with and without zlib, and to see if it honors error limits
276 config.numpressLinearErrorTolerance = 0; // means don't do tolerance checks
277 config.numpressSlofErrorTolerance = 0; // means don't do tolerance checks
278 for (int zloop=3;zloop--;)
279 {
281 testConfiguration(config);
282
284 testConfiguration(config);
285
287 testConfiguration(config);
288
289 config.compression = BinaryDataEncoder::Compression_Zlib; // and again with zlib
290 if (1==zloop) // and finally test numpress excessive error avoidance
291 {
292 config.numpressLinearErrorTolerance = .01;
293 config.numpressSlofErrorTolerance = .01;
294 }
295 }
296
297}
298
299
300void testBadFile(const string& filename)
301{
302 if (os_) *os_ << "testBadFile: " << filename << flush;
303
304 size_t filesize = 0;
305
306 try
307 {
308 filesize = (size_t) bfs::file_size(filename);
309 }
310 catch (exception&)
311 {
312 cerr << "\nUnable to find file " << filename << endl;
313 return;
314 }
315
316 if (os_) *os_ << " (" << filesize << " bytes)\n";
317
318 unit_assert(filesize%sizeof(double) == 0);
319
320 // read data from file into memory
321
322 vector<double> data(filesize/sizeof(double));
323 ifstream is(filename.c_str(), ios::binary);
324 is.read((char*)&data[0], filesize);
325
326 // set configuration to produce the error
327
329
330 if (filename.find("BinaryDataEncoderTest.bad.bin")!=string::npos)
331 {
332 // zlib compression encoding error with this configuration
336 }
337
338 // encode and decode
339
340 BinaryDataEncoder encoder(config);
341 string encoded;
342 encoder.encode(data, encoded);
343
344 BinaryData<double> decoded;
345 encoder.decode(encoded, decoded);
346
347 // verify
348
349 unit_assert(decoded.size() == data.size());
350 for (size_t i=0; i<decoded.size(); i++)
351 unit_assert(decoded[i] == data[i]);
352}
353
354
355int main(int argc, char* argv[])
356{
357 TEST_PROLOG(argc, argv)
358
359 try
360 {
361 vector<string> filenames;
362
363 for (int i=1; i<argc; i++)
364 {
365 if (!strcmp(argv[i],"-v")) os_ = &cout;
366 else if (bal::starts_with(argv[i], "--")) continue;
367 else filenames.push_back(argv[i]);
368 }
369
370 if (os_) *os_ << "BinaryDataEncoderTest\n\n";
371 test();
372 for_each(filenames.begin(), filenames.end(), testBadFile);
373
374 }
375 catch (exception& e)
376 {
377 TEST_FAILED(e.what())
378 }
379 catch (...)
380 {
381 TEST_FAILED("Caught unknown exception.")
382 }
383
385}
386
387
const char * sampleEncodedNumpressPic_
int main(int argc, char *argv[])
const char * sampleEncoded64LittleZlib_
const char * regressionTest(const BinaryDataEncoder::Config &config, bool expectNumpressIgnored)
const char * sampleEncodedNumpressLinear_
const char * sampleEncoded64BigZlib_
const char * sampleEncoded32LittleZlib_
const char * sampleEncoded64Big_
const int sampleDataSize_
const char * sampleEncodedNumpressSlofZlib_
const char * sampleEncodedNumpressLinearZlib_
const char * sampleEncodedNumpressPicZlib_
double sampleData_[]
const char * sampleEncodedModified64BigZlib_
void testBadFile(const string &filename)
const char * sampleEncoded32Little_
const char * sampleEncoded32Big_
ostream * os_
void test()
const char * sampleEncoded32BigZlib_
const char * sampleEncodedNumpressSlof_
const char * sampleEncoded64Little_
const char * filenames[]
void decode(const char *encodedData, size_t len, pwiz::util::BinaryData< double > &result) const
decode text-encoded data as binary
void encode(const std::vector< double > &data, std::string &result, size_t *binaryByteCount=NULL) const
encode binary data as a text string
A custom vector class that can store its contents in either a std::vector or a cli::array (when compi...
const_iterator end() const
const double epsilon
Definition DiffTest.cpp:41
void testConfiguration()
Definition DiffTest.cpp:83
encoding/decoding configuration
#define unit_assert(x)
Definition unit.hpp:85
#define TEST_EPILOG
Definition unit.hpp:183
#define TEST_FAILED(x)
Definition unit.hpp:177
#define unit_assert_equal(x, y, epsilon)
Definition unit.hpp:99
#define TEST_PROLOG(argc, argv)
Definition unit.hpp:175