|
- // Copyright (C) 2011 Carl Rogers
- // Released under MIT License
- // license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
-
- #include "cnpy.h"
-
- #include <stdint.h>
-
- #include <algorithm>
- #include <complex>
- #include <cstdlib>
- #include <cstring>
- #include <iomanip>
- #include <regex>
- #include <stdexcept>
-
- char cnpy::BigEndianTest() {
- int x = 1;
- return (((char*)&x)[0]) ? '<' : '>';
- }
-
- char cnpy::map_type(const std::type_info& t) {
- if (t == typeid(float)) return 'f';
- if (t == typeid(double)) return 'f';
- if (t == typeid(long double)) return 'f';
-
- if (t == typeid(int)) return 'i';
- if (t == typeid(char)) return 'i';
- if (t == typeid(short)) return 'i';
- if (t == typeid(long)) return 'i';
- if (t == typeid(long long)) return 'i';
-
- if (t == typeid(unsigned char)) return 'u';
- if (t == typeid(unsigned short)) return 'u';
- if (t == typeid(unsigned long)) return 'u';
- if (t == typeid(unsigned long long)) return 'u';
- if (t == typeid(unsigned int)) return 'u';
-
- if (t == typeid(bool)) return 'b';
-
- if (t == typeid(std::complex<float>)) return 'c';
- if (t == typeid(std::complex<double>)) return 'c';
- if (t == typeid(std::complex<long double>))
- return 'c';
-
- else
- return '?';
- }
-
- template <>
- std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const std::string rhs) {
- lhs.insert(lhs.end(), rhs.begin(), rhs.end());
- return lhs;
- }
-
- template <>
- std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const char* rhs) {
- // write in little endian
- size_t len = strlen(rhs);
- lhs.reserve(len);
- for (size_t byte = 0; byte < len; byte++) {
- lhs.push_back(rhs[byte]);
- }
- return lhs;
- }
-
- void cnpy::parse_npy_header(unsigned char* buffer, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order) {
- // std::string magic_string(buffer,6);
- uint8_t major_version = *reinterpret_cast<uint8_t*>(buffer + 6);
- uint8_t minor_version = *reinterpret_cast<uint8_t*>(buffer + 7);
- uint16_t header_len = *reinterpret_cast<uint16_t*>(buffer + 8);
- std::string header(reinterpret_cast<char*>(buffer + 9), header_len);
-
- size_t loc1, loc2;
-
- // fortran order
- loc1 = header.find("fortran_order") + 16;
- fortran_order = (header.substr(loc1, 4) == "True" ? true : false);
-
- // shape
- loc1 = header.find("(");
- loc2 = header.find(")");
-
- std::regex num_regex("[0-9][0-9]*");
- std::smatch sm;
- shape.clear();
-
- std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1);
- while (std::regex_search(str_shape, sm, num_regex)) {
- shape.push_back(std::stoi(sm[0].str()));
- str_shape = sm.suffix().str();
- }
-
- // endian, word size, data type
- // byte order code | stands for not applicable.
- // not sure when this applies except for byte array
- loc1 = header.find("descr") + 9;
- bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
- assert(littleEndian);
-
- // char type = header[loc1+1];
- // assert(type == map_type(T));
-
- std::string str_ws = header.substr(loc1 + 2);
- loc2 = str_ws.find("'");
- word_size = atoi(str_ws.substr(0, loc2).c_str());
- }
-
- void cnpy::parse_npy_header(FILE* fp, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order) {
- char buffer[256];
- size_t res = fread(buffer, sizeof(char), 11, fp);
- if (res != 11) throw std::runtime_error("parse_npy_header: failed fread");
- std::string header = fgets(buffer, 256, fp);
- assert(header[header.size() - 1] == '\n');
-
- size_t loc1, loc2;
-
- // fortran order
- loc1 = header.find("fortran_order");
- if (loc1 == std::string::npos)
- throw std::runtime_error("parse_npy_header: failed to find header keyword: 'fortran_order'");
- loc1 += 16;
- fortran_order = (header.substr(loc1, 4) == "True" ? true : false);
-
- // shape
- loc1 = header.find("(");
- loc2 = header.find(")");
- if (loc1 == std::string::npos || loc2 == std::string::npos)
- throw std::runtime_error("parse_npy_header: failed to find header keyword: '(' or ')'");
-
- std::regex num_regex("[0-9][0-9]*");
- std::smatch sm;
- shape.clear();
-
- std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1);
- while (std::regex_search(str_shape, sm, num_regex)) {
- shape.push_back(std::stoi(sm[0].str()));
- str_shape = sm.suffix().str();
- }
-
- // endian, word size, data type
- // byte order code | stands for not applicable.
- // not sure when this applies except for byte array
- loc1 = header.find("descr");
- if (loc1 == std::string::npos) throw std::runtime_error("parse_npy_header: failed to find header keyword: 'descr'");
- loc1 += 9;
- bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
- assert(littleEndian);
-
- // char type = header[loc1+1];
- // assert(type == map_type(T));
-
- std::string str_ws = header.substr(loc1 + 2);
- loc2 = str_ws.find("'");
- word_size = atoi(str_ws.substr(0, loc2).c_str());
- }
-
- void cnpy::parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset) {
- std::vector<char> footer(22);
- fseek(fp, -22, SEEK_END);
- size_t res = fread(&footer[0], sizeof(char), 22, fp);
- if (res != 22) throw std::runtime_error("parse_zip_footer: failed fread");
-
- uint16_t disk_no, disk_start, nrecs_on_disk, comment_len;
- disk_no = *(uint16_t*)&footer[4];
- disk_start = *(uint16_t*)&footer[6];
- nrecs_on_disk = *(uint16_t*)&footer[8];
- nrecs = *(uint16_t*)&footer[10];
- global_header_size = *(uint32_t*)&footer[12];
- global_header_offset = *(uint32_t*)&footer[16];
- comment_len = *(uint16_t*)&footer[20];
-
- assert(disk_no == 0);
- assert(disk_start == 0);
- assert(nrecs_on_disk == nrecs);
- assert(comment_len == 0);
- }
-
- cnpy::NpyArray load_the_npy_file(FILE* fp) {
- std::vector<size_t> shape;
- size_t word_size;
- bool fortran_order;
- cnpy::parse_npy_header(fp, word_size, shape, fortran_order);
-
- cnpy::NpyArray arr(shape, word_size, fortran_order);
- size_t nread = fread(arr.data<char>(), 1, arr.num_bytes(), fp);
- if (nread != arr.num_bytes()) throw std::runtime_error("load_the_npy_file: failed fread");
- return arr;
- }
-
- cnpy::NpyArray load_the_npz_array(FILE* fp, uint32_t compr_bytes, uint32_t uncompr_bytes) {
-
- std::vector<unsigned char> buffer_compr(compr_bytes);
- std::vector<unsigned char> buffer_uncompr(uncompr_bytes);
- size_t nread = fread(&buffer_compr[0], 1, compr_bytes, fp);
- if (nread != compr_bytes) throw std::runtime_error("load_the_npy_file: failed fread");
-
- int err;
- z_stream d_stream;
-
- d_stream.zalloc = Z_NULL;
- d_stream.zfree = Z_NULL;
- d_stream.opaque = Z_NULL;
- d_stream.avail_in = 0;
- d_stream.next_in = Z_NULL;
- err = inflateInit2(&d_stream, -MAX_WBITS);
-
- d_stream.avail_in = compr_bytes;
- d_stream.next_in = &buffer_compr[0];
- d_stream.avail_out = uncompr_bytes;
- d_stream.next_out = &buffer_uncompr[0];
-
- err = inflate(&d_stream, Z_FINISH);
- err = inflateEnd(&d_stream);
-
- std::vector<size_t> shape;
- size_t word_size;
- bool fortran_order;
- cnpy::parse_npy_header(&buffer_uncompr[0], word_size, shape, fortran_order);
-
- cnpy::NpyArray array(shape, word_size, fortran_order);
-
- size_t offset = uncompr_bytes - array.num_bytes();
- memcpy(array.data<unsigned char>(), &buffer_uncompr[0] + offset, array.num_bytes());
-
- return array;
- }
-
- cnpy::npz_t* cnpy::npz_load(std::string fname) {
- FILE* fp = fopen(fname.c_str(), "rb");
-
- if (!fp) {
- throw std::runtime_error("npz_load: Error! Unable to open file " + fname + "!");
- }
-
- auto* arrays = new cnpy::npz_t;
-
- while (1) {
- std::vector<char> local_header(30);
- size_t headerres = fread(&local_header[0], sizeof(char), 30, fp);
- if (headerres != 30) throw std::runtime_error("npz_load: failed fread");
-
- // if we've reached the global header, stop reading
- if (local_header[2] != 0x03 || local_header[3] != 0x04) break;
-
- // read in the variable name
- uint16_t name_len = *(uint16_t*)&local_header[26];
- std::string varname(name_len, ' ');
- size_t vname_res = fread(&varname[0], sizeof(char), name_len, fp);
- if (vname_res != name_len) throw std::runtime_error("npz_load: failed fread");
-
- // erase the lagging .npy
- varname.erase(varname.end() - 4, varname.end());
-
- // read in the extra field
- uint16_t extra_field_len = *(uint16_t*)&local_header[28];
- if (extra_field_len > 0) {
- std::vector<char> buff(extra_field_len);
- size_t efield_res = fread(&buff[0], sizeof(char), extra_field_len, fp);
- if (efield_res != extra_field_len) throw std::runtime_error("npz_load: failed fread");
- }
-
- uint16_t compr_method = *reinterpret_cast<uint16_t*>(&local_header[0] + 8);
- uint32_t compr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0] + 18);
- uint32_t uncompr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0] + 22);
-
- if (compr_method == 0) {
- (*arrays)[varname] = load_the_npy_file(fp);
- } else {
- (*arrays)[varname] = load_the_npz_array(fp, compr_bytes, uncompr_bytes);
- }
- }
-
- fclose(fp);
- return arrays;
- }
-
- cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) {
- FILE* fp = fopen(fname.c_str(), "rb");
-
- if (!fp) throw std::runtime_error("npz_load: Unable to open file " + fname);
-
- while (1) {
- std::vector<char> local_header(30);
- size_t header_res = fread(&local_header[0], sizeof(char), 30, fp);
- if (header_res != 30) throw std::runtime_error("npz_load: failed fread");
-
- // if we've reached the global header, stop reading
- if (local_header[2] != 0x03 || local_header[3] != 0x04) break;
-
- // read in the variable name
- uint16_t name_len = *(uint16_t*)&local_header[26];
- std::string vname(name_len, ' ');
- size_t vname_res = fread(&vname[0], sizeof(char), name_len, fp);
- if (vname_res != name_len) throw std::runtime_error("npz_load: failed fread");
- vname.erase(vname.end() - 4, vname.end()); // erase the lagging .npy
-
- // read in the extra field
- uint16_t extra_field_len = *(uint16_t*)&local_header[28];
- fseek(fp, extra_field_len, SEEK_CUR); // skip past the extra field
-
- uint16_t compr_method = *reinterpret_cast<uint16_t*>(&local_header[0] + 8);
- uint32_t compr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0] + 18);
- uint32_t uncompr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0] + 22);
-
- if (vname == varname) {
- NpyArray array =
- (compr_method == 0) ? load_the_npy_file(fp) : load_the_npz_array(fp, compr_bytes, uncompr_bytes);
- fclose(fp);
- return array;
- } else {
- // skip past the data
- uint32_t size = *(uint32_t*)&local_header[22];
- fseek(fp, size, SEEK_CUR);
- }
- }
-
- fclose(fp);
-
- // if we get here, we haven't found the variable in the file
- throw std::runtime_error("npz_load: Variable name " + varname + " not found in " + fname);
- }
-
- cnpy::NpyArray cnpy::npy_load(std::string fname) {
-
- FILE* fp = fopen(fname.c_str(), "rb");
-
- if (!fp) throw std::runtime_error("npy_load: Unable to open file " + fname);
-
- NpyArray arr = load_the_npy_file(fp);
-
- fclose(fp);
- return arr;
- }
|